In [1]:
# Install necessary libraries
!pip install transformers datasets peft accelerate bitsandbytes torch --quiet

# Import libraries
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from transformers import TrainingArguments, Trainer
import os

In [2]:
from transformers import BitsAndBytesConfig

# ====== Step 1: Load Model and Tokenizer ======
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = "cpu"  # Ensure we're using CPU'
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=6
).to(device)

print(f"Model loaded on {device}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on cpu


In [3]:
# ====== Step 2: Sample Data ======
sample_conversations = [
    {
        "conversation": (
            "Caller: I need you to send money immediately, I am in big trouble!\n"
            "Receiver: What happened? Why do you need money so urgently?\n"
            "Caller: I got into a legal issue, and I need bail money right now!"
        ),
        "label": "Legal/Authority Urgency"
    }
]

labels = ["Emotional Urgency", "Financial Urgency", "Legal/Authority Urgency",
          "No Urgency", "Social/Peer Pressure Urgency", "Romantic Urgency"]
label_to_id = {label: i for i, label in enumerate(labels)}

In [4]:
# ====== Step 3: Tokenization and Prediction ======
tokenizer.pad_token = tokenizer.eos_token

for conv in sample_conversations:
    inputs = tokenizer(
        conv["conversation"],
        truncation=True,
        padding=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()

    print(f"Conversation: {conv['conversation'][:100]}...")
    print(f"Actual Label: {conv['label']}")
    print(f"Predicted Label: {labels[prediction]}")
    print("=" * 50)

Conversation: Caller: I need you to send money immediately, I am in big trouble!
Receiver: What happened? Why do y...
Actual Label: Legal/Authority Urgency
Predicted Label: Social/Peer Pressure Urgency


In [5]:
# ====== Step 4: Load and Prepare Dataset ======
df = pd.read_csv("./urgency_data.csv")
print(df.head())

label_encoder = LabelEncoder()
df["label_encoded"] = label_encoder.fit_transform(df["label"])
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)

train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["conversation"].tolist(),
    df["label_encoded"].tolist(),
    test_size=0.2,
    random_state=42
)

print(f"Training Samples: {len(train_texts)}, Validation Samples: {len(val_texts)}")

                                        conversation              label
0  Caller: Hello, I'm so sorry to call you with t...  Emotional Urgency
1  Caller: Hi, I'm so sorry to inform you, but yo...  Emotional Urgency
2  Caller: This is terrible news, but I'm afraid ...  Emotional Urgency
3  Caller: I'm so sorry to call you with this, bu...  Emotional Urgency
4  Caller: Hello, I'm calling about your cousin. ...  Emotional Urgency
Label Mapping: {'Emotional Urgency': 0, 'Financial Urgency': 1, 'Legal/Authority Urgency': 2, 'No Urgency': 3, 'Romantic Urgency': 4, 'Social/Peer Pressure Urgency': 5}
Training Samples: 450, Validation Samples: 113


In [6]:
# ====== Step 5: Dataset and Dataloaders ======
class UrgencyDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

train_dataset = UrgencyDataset(train_encodings, train_labels)
val_dataset = UrgencyDataset(val_encodings, val_labels)

BATCH_SIZE = 4

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("Dataloaders Ready! Batch Size:", BATCH_SIZE)

Dataloaders Ready! Batch Size: 4


In [7]:
# ====== Step 6: Training Arguments ======
model.gradient_checkpointing_enable()

training_args = TrainingArguments(
    output_dir="./deepseek-finetuned",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=0.01,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    push_to_hub=False,
    gradient_accumulation_steps=2,
    optim="adamw_torch",
    save_total_limit=2,
    report_to="none",
    run_name="deepseek-cpu-finetune",
    fp16=False
)

print("Training arguments set for CPU!")

Training arguments set for CPU!




In [None]:
# ====== Step 7: Train Model ======

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

trainer.train()

# Save the fine-tuned model
model.save_pretrained("./deepseek-finetuned")
tokenizer.save_pretrained("./deepseek-finetuned")

print("Training complete! Model saved successfully.")

  trainer = Trainer(
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
