In [None]:
!pip install -U bitsandbytes
!pip install -U transformers
!pip install -U accelerate datasets peft bitsandbytes trl --quiet
!pip install -U huggingface_hub
!pip install -q evaluate transformers bert-score
!pip install rouge_score

In [None]:
import pandas as pd

df = pd.read_csv("datasets/train-conv.csv")

# Drop empty/junk columns
df = df[["utterance", "context", "prompt"]]

# Drop rows with missing values
df = df.dropna(subset=["utterance", "context", "prompt"]).reset_index(drop=True)


df.to_csv("datasets/train-conv-cleaned.csv", index=False)

print(f"Cleaned file saved with {len(df)} rows.")

Conversation with chatbot

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

df = pd.read_csv("datasets/train-conv-cleaned.csv")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Response generator function
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Start interactive chatbot session
print("\nWelcome to the Mistral 7B Empathetic Chatbot")
print("Type your message or 'sample' to use a dataset example.")
print("Type 'exit' to quit.\n")

while True:
    user_input = input("You: ").strip()

    if user_input.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break

    if user_input.lower() == "sample":
        sample = df.sample(1).iloc[0]
        situation = sample["prompt"]
        emotion = sample["context"]
        user_msg = sample["utterance"]

        print(f"\nPrompt: {situation}")
        print(f"Emotion: {emotion}")
        print(f"User Message: {user_msg}")

        formatted_prompt = f"<s>[INST] You are a supportive mental health assistant.\nEmotion: {emotion}\nSituation: {situation}\nMessage: {user_msg} [/INST]"
    else:
        formatted_prompt = f"<s>[INST] You are a supportive mental health assistant.\n{user_input} [/INST]"

    reply = generate_response(formatted_prompt)
    print(f"\nMistral: {reply}\n")


JSONL conversion of the dataset

In [None]:
import pandas as pd
import json
import shutil

df = pd.read_csv("datasets/train-conv-cleaned.csv")

# Drop NaNs and shuffle for randomness
df = df.dropna().sample(frac=1).reset_index(drop=True)

df_subset = df.iloc[:1000]

records = []
for i in range(0, len(df_subset) - 1, 2):  # pair every 2 utterances
    instruction = df_subset.iloc[i]["utterance"]
    output = df_subset.iloc[i + 1]["utterance"]
    records.append({
        "instruction": instruction.strip(),
        "output": output.strip()
    })

with open("empdiag_1000.jsonl", "w") as f:
    for r in records:
        json.dump(r, f)
        f.write("\n")

#shutil.copy("empdiag_1000.jsonl", "/content/drive/MyDrive/SIT782/datasets/empdiag_1000.jsonl")

print("Saved: empdiag_1000.jsonl")


Fine Tuning

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

dataset = load_dataset("json", data_files="datasets/empdiag_1000.jsonl", split="train")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

def tokenize(example):
    prompt = f"<s>[INST] {example['instruction']} [/INST]"
    output = f" {example['output']}</s>"
    result = tokenizer(prompt + output, truncation=True, padding="max_length", max_length=512)
    result["labels"] = tokenizer(output, truncation=True, padding="max_length", max_length=512)["input_ids"]
    return result

tokenized_dataset = dataset.map(tokenize, remove_columns=["instruction", "output"])

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

training_args = TrainingArguments(
    output_dir="mistral-lora-chatbot",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

model.save_pretrained("checkpoints/mistral-lora-chatbot")
tokenizer.save_pretrained("checkpoints/mistral-lora-chatbot")

print("Fine-tuning complete")


Trial 1 – Baseline LoRA Fine-Tuning Script

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType


dataset = load_dataset("json", data_files="datasets/empdiag_1000.jsonl", split="train")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token 
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

def tokenize(example):
    prompt = f"<s>[INST] {example['instruction']} [/INST]"
    output = f" {example['output']}</s>"
    result = tokenizer(prompt + output, truncation=True, padding="max_length", max_length=512)
    result["labels"] = tokenizer(output, truncation=True, padding="max_length", max_length=512)["input_ids"]
    return result

tokenized_dataset = dataset.map(tokenize, remove_columns=["instruction", "output"])

# LoRA Configuration – Trial 1
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

# Training Arguments – Trial 1
training_args = TrainingArguments(
    output_dir="checkpoints/mistral-lora-baseline",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

model.save_pretrained("checkpoints/mistral-lora-baseline")
tokenizer.save_pretrained("checkpoints/mistral-lora-baseline")

print("Trial 1 (Baseline) complete! Model saved to Drive.")


Trial 2: Lower Learning Rate + More Epochs

In [None]:
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

dataset = load_dataset("json", data_files="datasets/empdiag_1000.jsonl", split="train")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_enable_fp32_cpu_offload=False
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0},
    torch_dtype=torch.float16,
    trust_remote_code=True
)

def tokenize(example):
    prompt = f"<s>[INST] {example['instruction']} [/INST]"
    output = f" {example['output']}</s>"
    result = tokenizer(prompt + output, truncation=True, padding="max_length", max_length=512)
    result["labels"] = tokenizer(output, truncation=True, padding="max_length", max_length=512)["input_ids"]
    return result

tokenized_dataset = dataset.map(tokenize, remove_columns=["instruction", "output"])

#LoRA config (Trial 2)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

#TrainingArguments (Trial 2)
training_args = TrainingArguments(
    output_dir="checkpoints/mistral-lora-lr1e5_ep3",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=1e-5,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

model.save_pretrained("checkpoints/mistral-lora-lr1e5_ep3")
tokenizer.save_pretrained("checkpoints/mistral-lora-lr1e5_ep3")

print("Trial 2 complete! Model saved to Drive.")

Trial 3: Fast Learning with Higher LR

In [None]:
import torch
import gc
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

# Load dataset
dataset = load_dataset("json", data_files="datasets/empdiag_1000.jsonl", split="train")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_enable_fp32_cpu_offload=False
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0},
    torch_dtype=torch.float16,
    trust_remote_code=True
)

def tokenize(example):
    prompt = f"<s>[INST] {example['instruction']} [/INST]"
    output = f" {example['output']}</s>"
    result = tokenizer(prompt + output, truncation=True, padding="max_length", max_length=512)
    result["labels"] = tokenizer(output, truncation=True, padding="max_length", max_length=512)["input_ids"]
    return result

tokenized_dataset = dataset.map(tokenize, remove_columns=["instruction", "output"])

# LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

# TrainingArguments – Trial 3
training_args = TrainingArguments(
    output_dir="checkpoints/mistral-lora-lr5e5_ep2",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    learning_rate=5e-5,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

# Save final model
model.save_pretrained("checkpoints/mistral-lora-lr5e5_ep2")
tokenizer.save_pretrained("checkpoints/mistral-lora-lr5e5_ep2")

print("Trial 3 complete! Model saved to Drive.")


Evaluation for trial 1 model

In [None]:

import json
import torch
from tqdm import tqdm
import evaluate
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

model_path = "checkpoints/mistral-lora-baseline"
eval_data_path = "datasets/empdiag_1000.jsonl" 

eval_data = []
with open(eval_data_path, "r") as f:
    for line in f:
        r = json.loads(line)
        eval_data.append((r["instruction"], r["output"]))

peft_config = PeftConfig.from_pretrained(model_path)
base_model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path, device_map="auto", torch_dtype=torch.float16)
model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

predictions = []
references = []

print(f"Evaluating: Trial 1 — Baseline")

for instruction, expected in tqdm(eval_data[:250]):
    prompt = f"<s>[INST] {instruction} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()

    predictions.append(pred)
    references.append(expected)

bleu_score = bleu.compute(predictions=predictions, references=[[r] for r in references])
rouge_score = rouge.compute(predictions=predictions, references=references)
bert_score = bertscore.compute(predictions=predictions, references=references, lang="en")


results = {
    "BLEU": round(bleu_score["bleu"], 4),
    "ROUGE-1": round(rouge_score["rouge1"], 4),
    "ROUGE-2": round(rouge_score["rouge2"], 4),
    "ROUGE-L": round(rouge_score["rougeL"], 4),
    "BERTScore (F1)": round(sum(bert_score["f1"]) / len(bert_score["f1"]), 4)
}

print("\nTrial 1 Results:")
print(pd.DataFrame([results]))


Evaluation trial 2 model

In [None]:
import json
import torch
from tqdm import tqdm
import evaluate
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig


model_path = "checkpoints/mistral-lora-lr1e5_ep3"
eval_data_path = "datasets/empdiag_1000.jsonl" 

eval_data = []
with open(eval_data_path, "r") as f:
    for line in f:
        r = json.loads(line)
        eval_data.append((r["instruction"], r["output"]))

peft_config = PeftConfig.from_pretrained(model_path)
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token


bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

predictions = []
references = []

print(f"Evaluating: Trial 2 — LR 1e-5, Epochs 3")

for instruction, expected in tqdm(eval_data[:250]):
    prompt = f"<s>[INST] {instruction} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()

    predictions.append(pred)
    references.append(expected)


bleu_score = bleu.compute(predictions=predictions, references=[[r] for r in references])
rouge_score = rouge.compute(predictions=predictions, references=references)
bert_score = bertscore.compute(predictions=predictions, references=references, lang="en")

results = {
    "BLEU": round(bleu_score["bleu"], 4),
    "ROUGE-1": round(rouge_score["rouge1"], 4),
    "ROUGE-2": round(rouge_score["rouge2"], 4),
    "ROUGE-L": round(rouge_score["rougeL"], 4),
    "BERTScore (F1)": round(sum(bert_score["f1"]) / len(bert_score["f1"]), 4)
}

print("\nTrial 2 Results:")
print(pd.DataFrame([results]))


Evaluation trial 3 model

In [None]:
import json
import torch
from tqdm import tqdm
import evaluate
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

model_path = "checkpoints/mistral-lora-lr5e5_ep2"
eval_data_path = "datasets/empdiag_1000.jsonl"  

eval_data = []
with open(eval_data_path, "r") as f:
    for line in f:
        r = json.loads(line)
        eval_data.append((r["instruction"], r["output"]))

peft_config = PeftConfig.from_pretrained(model_path)
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

predictions = []
references = []

print(f"Evaluating: Trial 3 — LR 5e-5, Epochs 2")

for instruction, expected in tqdm(eval_data[:250]):
    prompt = f"<s>[INST] {instruction} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
    pred = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()

    predictions.append(pred)
    references.append(expected)


bleu_score = bleu.compute(predictions=predictions, references=[[r] for r in references])
rouge_score = rouge.compute(predictions=predictions, references=references)
bert_score = bertscore.compute(predictions=predictions, references=references, lang="en")

results = {
    "BLEU": round(bleu_score["bleu"], 4),
    "ROUGE-1": round(rouge_score["rouge1"], 4),
    "ROUGE-2": round(rouge_score["rouge2"], 4),
    "ROUGE-L": round(rouge_score["rougeL"], 4),
    "BERTScore (F1)": round(sum(bert_score["f1"]) / len(bert_score["f1"]), 4)
}

print("\nTrial 3 Results:")
print(pd.DataFrame([results]))
