# Fine-Tuning a Conversational LLM for Psychological Dialogue Support

This project fine-tunes a Large Language Model (LLM) to simulate natural, empathetic, and context-aware conversations between a psychologist and a patient. The model learns from a curated dataset of real or synthetic therapy-style question‚Äìanswer exchanges, allowing it to generate emotionally intelligent, coherent, and supportive responses to mental-health-related queries.

Unlike general chatbots trained on open-domain text, this model specializes in therapeutic conversation patterns ‚Äî focusing on reflective listening, validating emotions, and suggesting healthy thought reframing.

The final output is an AI-driven conversational agent that can engage in mental-wellness dialogue, provide psychoeducation, and guide users toward constructive self-reflection ‚Äî without offering clinical diagnosis or treatment.

# Import Libraries

In [29]:
import pandas as pd
import os
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

In [30]:
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

from transformers import pipeline
from peft import LoraConfig, get_peft_model
import datasets

from transformers.utils import logging as hf_logging


# Import Dataset and Preprocess it

In [31]:
# Load and preprocess the data
df_fine_tuning = pd.read_csv('train.csv')
df_fine_tuning = df_fine_tuning.dropna()

df_fine_tuning = df_fine_tuning.dropna().apply(lambda x: x.str.strip())
df_fine_tuning = df_fine_tuning.sample(n=20, random_state=42)
# Shuffle and split the dataset
train_df = df_fine_tuning.sample(frac=0.8, random_state=42)
eval_df = df_fine_tuning.drop(train_df.index)

#length of each splits dataset
print(f"Length of training dataset: {len(train_df)}")
print(f"Length of evaluation dataset: {len(eval_df)}")


Length of training dataset: 16
Length of evaluation dataset: 4


In [32]:
train_df.head(1)

Unnamed: 0,Context,Response
3155,We've been in a long distance relationship for two and a half years. I recently saw his phone and saw the people he texts the most and one of them was a female coworker. I don't know how to approach this situation. How do I ask him about it?.,"If you'd like to ask a question, then go ahead and ask!Boyfriend/girlfriend is a close relationship and it is usually understood as an exclusive relationship. ¬†You're definitely entitled to know if your wishes to not have him texting another woman, are being respected.Often people are afraid to ask because they fear the truth will hurt them.In the short term this is definitely true.In the long term, knowing you are getting what you want and at the very least stating your expectations to your boyfriend, will clarify for him, what is meaningful in your relationship."


## Download and test model without any finetuning


model_name = "microsoft/Phi-3-mini-4k-instruct"

# Load model and tokenizer (CPU)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
model.to("cpu")

# Ask a simple question
question = "What are 3 healthy ways to deal with anxiety?"

messages = [
    {"role": "system", "content": "You are a calm, empathetic assistant that offers short, clear mental wellness advice."},
    {"role": "user", "content": question}
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

reply = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
print("Bot:", reply.strip())


## Now Finetune the model and see the results

In [33]:
#import Dataset

from datasets import Dataset

train_ds = Dataset.from_pandas(train_df[["Context", "Response"]].reset_index(drop=True))
eval_ds  = Dataset.from_pandas(eval_df[["Context", "Response"]].reset_index(drop=True))

## Download the Model and its tokenizer


In [34]:
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"   # new base
MAX_LEN = 1024


In [35]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.truncation_side = "right"
tokenizer.model_max_length = MAX_LEN

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32
)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False  # needed for training

# Correct LoRA targets for Phi-3
target_modules = ["qkv_proj"]  # "o_proj", "gate_up_proj", "down_proj"

lora_config = LoraConfig(
    r=1, # (rank): the adapter‚Äôs low-rank size. Higher r ‚áí more capacity, more parameters
    lora_alpha=2, # scaling factor
    target_modules=target_modules, # modules to apply LoRA to
    lora_dropout=0.1, # dropout for regularization
    bias="none", # no bias modification
    task_type="CAUSAL_LM" # task type for causal language modeling
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

loading file tokenizer.model from cache at C:\Users\TashfeenAhmed\.cache\huggingface\hub\models--microsoft--Phi-3-mini-4k-instruct\snapshots\0a67737cc96d2554230f90338b163bc6380a2a85\tokenizer.model
loading file tokenizer.json from cache at C:\Users\TashfeenAhmed\.cache\huggingface\hub\models--microsoft--Phi-3-mini-4k-instruct\snapshots\0a67737cc96d2554230f90338b163bc6380a2a85\tokenizer.json
loading file added_tokens.json from cache at C:\Users\TashfeenAhmed\.cache\huggingface\hub\models--microsoft--Phi-3-mini-4k-instruct\snapshots\0a67737cc96d2554230f90338b163bc6380a2a85\added_tokens.json
loading file special_tokens_map.json from cache at C:\Users\TashfeenAhmed\.cache\huggingface\hub\models--microsoft--Phi-3-mini-4k-instruct\snapshots\0a67737cc96d2554230f90338b163bc6380a2a85\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\TashfeenAhmed\.cache\huggingface\hub\models--microsoft--Phi-3-mini-4k-instruct\snapshots\0a67737cc96d2554230f90338b163bc6380a2a85\to

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OSError: The paging file is too small for this operation to complete. (os error 1455)

## Tokenize the dataset for training

In [None]:
# --- System prompt for consistent tone ---
SYSTEM_PROMPT = (
    "You are a calm, empathetic assistant for mental wellbeing. "
    "Validate feelings, be non-judgmental, suggest one small next step. "
    "Do not diagnose. If crisis is indicated, advise contacting local emergency services."
)


def encode_row(example):
    """
    Convert one (Context ‚Üí Response) pair from your dataset
    into tokenized model-ready tensors for fine-tuning a chat model.

    Input:
        example: a dictionary-like object with keys:
                 "Context"  - what the user said (the question)
                 "Response" - what the assistant (therapist) replied

    Output:
        A dictionary containing:
          - input_ids: token IDs of the full conversation
          - attention_mask: mask for real vs padded tokens
          - labels: same as input_ids but with prompt tokens masked as -100
                    (so loss is only computed on assistant‚Äôs reply)
    """
    # -------------------------------------------------------------------------
    # 1Ô∏è‚É£ Build the "full conversation" message list (system + user + assistant)
    # -------------------------------------------------------------------------
    # SYSTEM_PROMPT provides consistent tone/behavior.
    # The user and assistant parts come from your dataset row.
    messages_full = [
        {"role": "system",    "content": SYSTEM_PROMPT},      # defines model personality
        {"role": "user",      "content": example["Context"]}, # user question/input
        {"role": "assistant", "content": example["Response"]} # correct reply to learn
    ]

    # Convert that structured list into plain text formatted for Phi-3.
    # Example output:
    #   <|system|>You are calm...
    #   <|user|>I feel anxious
    #   <|assistant|>That‚Äôs understandable...
    text_full = tokenizer.apply_chat_template(
        messages_full,
        tokenize=False,             # return as string, not token IDs yet
        add_generation_prompt=False # don't append an empty assistant header
    )

    # -------------------------------------------------------------------------
    # 2Ô∏è‚É£ Build the "prompt-only" version (system + user only, no assistant text)
    # -------------------------------------------------------------------------
    # This helps us identify how long the prompt is in tokens,
    # so we can later mask that region in the labels.
    messages_prompt = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": example["Context"]}
    ]

    # Setting add_generation_prompt=True tells the tokenizer to append
    # the "assistant" header ‚Äî basically where generation will begin.
    prompt_only = tokenizer.apply_chat_template(
        messages_prompt,
        tokenize=False,
        add_generation_prompt=True
    )

    # -------------------------------------------------------------------------
    # 3Ô∏è‚É£ Tokenize both versions (full and prompt)
    # -------------------------------------------------------------------------
    # Convert the text into token IDs that the model understands.
    # We truncate to MAX_LEN (to fit model context) and pad shorter ones.
    # return_tensors="pt" gives PyTorch tensors directly.
    enc_full = tokenizer(
        text_full,
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length",
        return_tensors="pt"
    )

    enc_prompt = tokenizer(
        prompt_only,
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length",
        return_tensors="pt"
    )

    # -------------------------------------------------------------------------
    # 4Ô∏è‚É£ Extract token IDs and attention masks from encodings
    # -------------------------------------------------------------------------
    input_ids = enc_full["input_ids"][0]          # the actual tokens (numbers)
    attn_mask = enc_full["attention_mask"][0]     # 1 = real token, 0 = padding

    # -------------------------------------------------------------------------
    # 5Ô∏è‚É£ Create labels for training (same as input_ids initially)
    # -------------------------------------------------------------------------
    labels = input_ids.clone()

    # -------------------------------------------------------------------------
    # 6Ô∏è‚É£ Mask out the prompt tokens (system + user)
    # -------------------------------------------------------------------------
    # We compute how many tokens belong to the prompt part.
    # We use the attention mask of the "prompt-only" encoding to count them.
    prompt_len = int((enc_prompt["attention_mask"][0]).sum().item())

    # For all tokens that belong to the system+user part,
    # we set label = -100 so the loss is ignored on them.
    # Only the assistant's part will be used for loss calculation.
    labels[:prompt_len] = -100

    # -------------------------------------------------------------------------
    # 7Ô∏è‚É£ Return the dictionary that the Trainer expects
    # -------------------------------------------------------------------------
    return {
        "input_ids": input_ids,           # tokenized full conversation
        "attention_mask": attn_mask,      # mask for real tokens vs padding
        "labels": labels                  # same as input_ids but masked
    }


In [None]:
train_tokenized = train_ds.map(encode_row)
eval_tokenized  = eval_ds.map(encode_row)

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [None]:
# set format for PyTorch
cols = ["input_ids","attention_mask","labels"]
train_tokenized.set_format(type="torch", columns=cols)
eval_tokenized.set_format(type="torch", columns=cols)

In [None]:
# --- Training args (CPU-friendly defaults) ---
hf_logging.set_verbosity_info()
datasets.logging.set_verbosity_info()

args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,  
    learning_rate=2e-4,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=1,    logging_first_step=True, logging_strategy="steps", dataloader_pin_memory= False,
    disable_tqdm=False,  
    report_to="none",
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2, #
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
)

# --- Print step counts before training ---
num_samples = len(train_tokenized)
batch_size = args.per_device_train_batch_size
grad_accum = args.gradient_accumulation_steps
epochs = args.num_train_epochs

steps_per_epoch = (num_samples + (batch_size * grad_accum) - 1) // (batch_size * grad_accum)
total_steps = steps_per_epoch * epochs

print(f"üìä Dataset size: {num_samples} samples")
print(f"üß© Effective batch size: {batch_size * grad_accum}")
print(f"üîÅ Steps per epoch: {steps_per_epoch}")
print(f"‚è±Ô∏è Total training steps: {total_steps}\n")

# --- Train ---
trainer.train()

PyTorch: setting up devices
The following columns in the Training set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: Response, Context. If Response, Context are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 16
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 3
  Number of trainable parameters = 1,572,864


üìä Dataset size: 16 samples
üß© Effective batch size: 16
üîÅ Steps per epoch: 1
‚è±Ô∏è Total training steps: 3



KeyboardInterrupt: 

In [None]:
# --- Save LoRA adapter (small) ---
os.makedirs("./lora_finetuned_model", exist_ok=True)
trainer.model.save_pretrained("./lora_finetuned_model")
tokenizer.save_pretrained("./lora_finetuned_model")
print("‚úÖ LoRA adapter saved to ./lora_finetuned_model")