In [8]:
!pip install transformers datasets torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install accelerate peft

Looking in indexes: https://download.pytorch.org/whl/cpu


In [9]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from datasets import Dataset
import json
from peft import LoraConfig, get_peft_model, TaskType
import os

In [11]:
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")

PyTorch version: 2.7.0+cpu
CUDA available: False
Device: cpu


In [12]:
with open('train_data.json', 'r') as f:
    train_data = json.load(f)

with open('val_data.json', 'r') as f:
    val_data = json.load(f)

In [15]:
def format_instruction(example):
    text = f"""### Instruction:
{example['instruction']}

### Response:
{example['response']}"""
    return {"text": text}

train_dataset = Dataset.from_list([format_instruction(ex) for ex in train_data])
val_dataset = Dataset.from_list([format_instruction(ex) for ex in val_data])

In [16]:
# Cell 6: Load SMALLER model for CPU
# Using DialoGPT-small or distilgpt2 for CPU training
model_name = "microsoft/DialoGPT-small"  # 117M parameters (manageable on CPU)
# Alternative: model_name = "distilgpt2"  # Even smaller

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# Load model without quantization (CPU doesn't support bitsandbytes)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu",  # Explicitly use CPU
    torch_dtype=torch.float32  # Full precision for CPU
)

print(f"Model loaded on: {model.device}")
print(f"Model size: {sum(p.numel() for p in model.parameters()) / 1e6:.2f}M parameters")


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/641 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/351M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Model loaded on: cpu
Model size: 124.44M parameters


In [17]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=4,  # Smaller rank for CPU
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["c_attn"]  # Only target attention layers
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 147,456 || all params: 124,587,264 || trainable%: 0.1184




In [18]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128  # Shorter sequences for CPU
    )

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/31200 [00:00<?, ? examples/s]

Map:   0%|          | 0/7800 [00:00<?, ? examples/s]

In [20]:
training_args = TrainingArguments(
    output_dir="./insurance-chatbot-model-cpu",
    num_train_epochs=1,  # Fewer epochs for CPU
    per_device_train_batch_size=1,  # Very small batch size
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,  # Accumulate gradients
    warmup_steps=50,
    logging_dir="./logs",
    logging_steps=10,
    eval_strategy="steps",  # Changed from evaluation_strategy
    eval_steps=100,
    save_steps=200,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none",  # Disable wandb/tensorboard
    fp16=False,  # No mixed precision on CPU
    dataloader_num_workers=0,  # Single threaded for stability
)

In [23]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [25]:
print("Starting training on CPU... This will take a while!")
print("Consider using a subset of data for testing:")
print(f"Training on {len(tokenized_train)} examples")

trainer.train()

Starting training on CPU... This will take a while!
Consider using a subset of data for testing:
Training on 31200 examples


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss
100,13.8553,12.5532
200,7.3277,6.04097
300,5.046,3.997415
400,3.965,3.19575
500,3.5263,2.825998
600,3.1752,2.601627
700,2.9841,2.464112
800,2.858,2.350265
900,2.7257,2.27656
1000,2.7152,2.214334




TrainOutput(global_step=1950, training_loss=3.933395485511193, metrics={'train_runtime': 62361.8593, 'train_samples_per_second': 0.5, 'train_steps_per_second': 0.031, 'total_flos': 2041611131289600.0, 'train_loss': 3.933395485511193, 'epoch': 1.0})

In [26]:
model.save_pretrained("./insurance-chatbot-cpu")
tokenizer.save_pretrained("./insurance-chatbot-cpu")

('./insurance-chatbot-cpu\\tokenizer_config.json',
 './insurance-chatbot-cpu\\special_tokens_map.json',
 './insurance-chatbot-cpu\\chat_template.jinja',
 './insurance-chatbot-cpu\\vocab.json',
 './insurance-chatbot-cpu\\merges.txt',
 './insurance-chatbot-cpu\\added_tokens.json',
 './insurance-chatbot-cpu\\tokenizer.json')