# Mental Health Chatbot with Phi-3 Mini
This notebook implements a chatbot fine-tuned on mental health dialogue datasets using Phi-3 Mini (3.8B). It includes data loading, model training, sentiment analysis, and a Streamlit app for interaction.

In [None]:
# Clean corrupted transformers and pyarrow installations
!rm -rf /usr/local/lib/python3.11/dist-packages/~ransformers
!rm -rf /usr/local/lib/python3.11/dist-packages/transformers
!rm -rf /usr/local/lib/python3.11/dist-packages/pyarrow
!pip uninstall -y transformers sentence-transformers torch torchvision torchaudio datasets gcsfs fsspec pyarrow

# Clear pip cache to avoid reusing corrupted packages
!pip cache purge

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Install dependencies with compatible versions
%pip install --no-cache-dir --force-reinstall \
    torch==2.6.0 \
    torchvision==0.21.0 \
    torchaudio==2.6.0 \
    transformers==4.44.2 \
    datasets==3.6.0 \
    sentence-transformers==4.1.0 \
    pandas==2.2.3 \
    numpy==1.26.4 \
    tqdm==4.66.5 \
    nltk==3.9.1 \
    rouge_score==0.1.2 \
    bitsandbytes==0.43.3 \
    peft==0.9.0 \
    triton==2.3.1 \
    accelerate==1.0.1 \
    streamlit==1.45.1 \
    pyngrok==7.2.0 \
    serpapi==0.1.5 \
    fsspec==2025.3.0 \
    gcsfs==2025.3.0 \
    pyarrow==17.0.0 -q

# Import rouge_scorer
from rouge_score import rouge_scorer

In [None]:
!nvidia-smi
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")
print(f"CUDA version: {torch.version.cuda}")
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Please select T4 GPU runtime.")

In [None]:
# Verify installed versions
import torch
import transformers
import datasets
import fsspec
import gcsfs
import sentence_transformers
import bitsandbytes
import peft
import pyarrow
import triton
print(f"torch: {torch.__version__}")
print(f"transformers: {transformers.__version__}")
print(f"datasets: {datasets.__version__}")
print(f"fsspec: {fsspec.__version__}")
print(f"gcsfs: {gcsfs.__version__}")
print(f"sentence-transformers: {sentence_transformers.__version__}")
print(f"bitsandbytes: {bitsandbytes.__version__}")
print(f"peft: {peft.__version__}")
print(f"pyarrow: {pyarrow.__version__}")
print(f"triton: {triton.__version__}")

In [None]:
import pandas as pd
import json
import nltk
from datasets import Dataset, load_dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    BertTokenizer,
    AutoModelForSequenceClassification
)
from peft import LoraConfig, get_peft_model
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import os
from google.colab import userdata
from tqdm import tqdm
nltk.download('punkt')

In [None]:
try:
    HF_TOKEN = userdata.get('HF_TOKEN')
    if not HF_TOKEN:
        raise ValueError("HF_TOKEN not found in Colab Secrets.")
except Exception as e:
    print(f"Error loading HF_TOKEN: {e}")
    print("Please set HF_TOKEN in Colab Secrets or input manually.")
    HF_TOKEN = input("Enter your Hugging Face token: ")

In [None]:
def load_and_merge_datasets():
    dialogue_data = []
    
    # Load HOPE dataset
    try:
        with open('/content/drive/MyDrive/hope.json', 'r') as f:
            hope_data = json.load(f)
        hope_pairs = [
            {
                "prompt": f"Client: {item['client']}",
                "response": f"Therapist: {item['therapist']}",
                "emotion": item.get('emotion', 'unknown'),
                "source": "HOPE"
            }
            for item in hope_data
        ]
        dialogue_data.extend(hope_pairs)
        print(f"Loaded {len(hope_pairs)} pairs from HOPE")
    except Exception as e:
        print(f"Error loading HOPE: {e}")
    
    # Load EmpatheticDialogues
    try:
        empathetic_df = pd.read_csv('/content/drive/MyDrive/empatheticdialogues.csv')
        empathy_pairs = [
            {
                "prompt": f"Client: {row['prompt']}",
                "response": f"Therapist: {row['response']}",
                "emotion": row.get('emotion', 'unknown'),
                "source": "EmpatheticDialogues"
            }
            for _, row in empathetic_df.iterrows()
        ]
        dialogue_data.extend(empathy_pairs)
        print(f"Loaded {len(empathy_pairs)} pairs from EmpatheticDialogues")
    except Exception as e:
        print(f"Error loading EmpatheticDialogues: {e}")
    
    # Load CounselChat
    try:
        counsel_df = pd.read_csv('/content/drive/MyDrive/counselchat.csv')
        counsel_pairs = [
            {
                "prompt": f"Client: {row['questionText']}",
                "response": f"Therapist: {row['answerText']}",
                "emotion": row.get('emotion', 'unknown'),
                "source": "CounselChat"
            }
            for _, row in counsel_df.iterrows()
        ]
        dialogue_data.extend(counsel_pairs)
        print(f"Loaded {len(counsel_pairs)} pairs from CounselChat")
    except Exception as e:
        print(f"Error loading CounselChat: {e}")
    
    # Add new dataset (placeholder for continuous development)
    try:
        new_df = pd.read_csv('/content/drive/MyDrive/new_therapy_data.csv')
        new_pairs = [
            {
                "prompt": f"Client: {row['client_text']}",
                "response": f"Therapist: {row['therapist_text']}",
                "emotion": row.get('emotion', 'unknown'),
                "source": "NewDataset"
            }
            for _, row in new_df.iterrows()
        ]
        dialogue_data.extend(new_pairs)
        print(f"Added {len(new_pairs)} pairs from NewDataset")
    except Exception as e:
        print(f"Error loading new dataset: {e}")
    
    # Convert to Dataset
    dialogue_data = [{'prompt': d['prompt'], 'response': d['response'], 'emotion': d.get('emotion', 'unknown')} for d in dialogue_data]
    return Dataset.from_list(dialogue_data)

# Load and split dataset
dataset = load_and_merge_datasets()
train_data, val_data = train_test_split(dataset.to_pandas(), test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_data.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_data.reset_index(drop=True))
print(f"Training samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model_path = "/content/drive/MyDrive/phi-finetuned"
if os.path.exists(model_path) and os.path.isfile(os.path.join(model_path, "config.json")):
    try:
        phi_model = AutoModelForCausalLM.from_pretrained(
            model_path,
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True
        )
        phi_tokenizer = AutoTokenizer.from_pretrained(
            model_path,
            trust_remote_code=True
        )
        print(f"Loaded fine-tuned Phi-3 Mini from {model_path}")
    except Exception as e:
        print(f"Error loading fine-tuned model: {e}")
        phi_model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct",
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True
        )
        phi_tokenizer = AutoTokenizer.from_pretrained(
            "microsoft/Phi-3-mini-4k-instruct",
            trust_remote_code=True
        )
else:
    print(f"Fine-tuned model not found at {model_path}. Loading pre-trained Phi-3 Mini.")
    phi_model = AutoModelForCausalLM.from_pretrained(
        "microsoft/Phi-3-mini-4k-instruct",
        quantization_config=quantization_config,
        device_map="auto",
        trust_remote_code=True
    )
    phi_tokenizer = AutoTokenizer.from_pretrained(
        "microsoft/Phi-3-mini-4k-instruct",
        trust_remote_code=True
    )

phi_tokenizer.pad_token = phi_tokenizer.eos_token

# Load BERT for sentiment analysis
try:
    bert_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/bert-base-uncased-emotion", token=HF_TOKEN)
    bert_tokenizer = BertTokenizer.from_pretrained("bhadresh-savani/bert-base-uncased-emotion", token=HF_TOKEN)
    print("Loaded BERT model for sentiment analysis.")
except Exception as e:
    print(f"Error loading BERT model: {e}")
    raise

In [None]:
from transformers import DataCollatorForLanguageModeling

def tokenize_function(examples):
    texts = [prompt + " " + response for prompt, response in zip(examples['prompt'], examples['response'])]
    tokenized = phi_tokenizer(texts, truncation=True, max_length=128, padding='max_length')
    # Ensure labels are set to input_ids for causal LM
    tokenized['labels'] = tokenized['input_ids'].copy()
    return tokenized

# Limit dataset for memory
train_dataset = train_dataset.select(range(min(10000, len(train_dataset))))
val_dataset = val_dataset.select(range(min(2500, len(val_dataset))))
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Remove extra columns
train_dataset = train_dataset.remove_columns([col for col in train_dataset.column_names if col not in ['input_ids', 'attention_mask', 'labels']])
val_dataset = val_dataset.remove_columns([col for col in val_dataset.column_names if col not in ['input_ids', 'attention_mask', 'labels']])
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

# Debug dataset
print("Train dataset columns:", train_dataset.column_names)
print("Sample train item:", train_dataset[0].keys())

# Apply LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["qkv_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
phi_model = get_peft_model(phi_model, lora_config)

# Ensure LoRA parameters require gradients
phi_model.train()
for name, param in phi_model.named_parameters():
    if "lora" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Disable cache for gradient checkpointing
phi_model.config.use_cache = False

# Debug: Test forward pass with a single batch
print("Testing model forward pass...")
sample_batch = train_dataset[:1]
input_ids = sample_batch['input_ids'].to('cuda')
attention_mask = sample_batch['attention_mask'].to('cuda')
labels = sample_batch['labels'].to('cuda')
try:
    outputs = phi_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    print("Forward pass successful. Loss:", outputs.loss.item())
except Exception as e:
    print(f"Forward pass failed: {e}")
    raise

# Manual training loop
data_collator = DataCollatorForLanguageModeling(tokenizer=phi_tokenizer, mlm=False)
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=data_collator)

# Optimizer
optimizer = torch.optim.AdamW(phi_model.parameters(), lr=2e-5)
num_epochs = 3
gradient_accumulation_steps = 4
max_grad_norm = 0.5
device = torch.device('cuda')
phi_model.to(device)

# Debug first batch
print("Inspecting first batch from DataLoader...")
first_batch = next(iter(train_dataloader))
print("Batch keys:", first_batch.keys())
print("Batch shapes:", {k: v.shape for k, v in first_batch.items()})

# Training loop
phi_model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader, desc=f"Epoch {epoch+1}")):
        # Filter batch to only expected keys
        valid_batch = {k: v.to(device) for k, v in batch.items() if k in ['input_ids', 'attention_mask', 'labels']}
        
        # Forward pass
        outputs = phi_model(**valid_batch)
        loss = outputs.loss / gradient_accumulation_steps
        loss.backward()
        total_loss += loss.item() * gradient_accumulation_steps
        
        # Gradient accumulation
        if (step + 1) % gradient_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(phi_model.parameters(), max_grad_norm)
            optimizer.step()
            optimizer.zero_grad()
        
        # Log every 100 steps
        if (step + 1) % 100 == 0:
            print(f"Epoch {epoch+1}, Step {step+1}, Loss: {total_loss / (step + 1):.4f}")
    
    print(f"Epoch {epoch+1} Average Loss: {total_loss / len(train_dataloader):.4f}")

# Save model
phi_model.save_pretrained("/content/drive/MyDrive/phi-finetuned")
phi_tokenizer.save_pretrained("/content/drive/MyDrive/phi-finetuned")

In [None]:
class MentalHealthChatbot:
    def __init__(self, model, tokenizer, bert_model, bert_tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.bert_model = bert_model
        self.bert_tokenizer = bert_tokenizer
        self.enable_sentiment = True
        self.emotion_labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

    def analyze_sentiment(self, text):
        try:
            inputs = self.bert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
            with torch.no_grad():
                outputs = self.bert_model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1)
            emotion_idx = probs.argmax().item()
            return self.emotion_labels[emotion_idx], probs[0][emotion_idx].item()
        except Exception as e:
            print(f"Error in sentiment analysis: {e}")
            return "unknown", 0.0

    def generate_response(self, user_input, max_length=100):
        try:
            prompt = f"Client: {user_input}"
            if self.enable_sentiment:
                emotion, confidence = self.analyze_sentiment(user_input)
                prompt += f" [Emotion: {emotion}, Confidence: {confidence:.2f}]"
            prompt += " Therapist:"

            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

            outputs = self.model.generate(
                **inputs,
                max_length=max_length,
                num_return_sequences=1,
                do_sample=True,
                top_p=0.9,
                temperature=0.7,
                pad_token_id=self.tokenizer.pad_token_id
            )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            therapist_response = response.split("Therapist:")[-1].strip()
            return therapist_response
        except Exception as e:
            print(f"Error generating response: {e}")
            return "I'm sorry, I couldn't process that. Can you try again?"

# Initialize chatbot
chatbot = MentalHealthChatbot(phi_model, phi_tokenizer, bert_model, bert_tokenizer)

In [None]:
%%writefile app.py
import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, BertTokenizer, AutoModelForSequenceClassification

# Load models
@st.cache_resource
def load_models():
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )
    try:
        model = AutoModelForCausalLM.from_pretrained(
            "/content/drive/MyDrive/phi-finetuned",
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "/content/drive/MyDrive/phi-finetuned",
            trust_remote_code=True
        )
        tokenizer.pad_token = tokenizer.eos_token
        bert_model = AutoModelForSequenceClassification.from_pretrained(
            "bhadresh-savani/bert-base-uncased-emotion"
        )
        bert_tokenizer = BertTokenizer.from_pretrained(
            "bhadresh-savani/bert-base-uncased-emotion"
        )
        return model, tokenizer, bert_model, bert_tokenizer
    except Exception as e:
        st.error(f"Error loading models: {e}")
        return None, None, None, None

model, tokenizer, bert_model, bert_tokenizer = load_models()

# Chatbot class
class MentalHealthChatbot:
    def __init__(self, model, tokenizer, bert_model, bert_tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.bert_model = bert_model
        self.bert_tokenizer = bert_tokenizer
        self.enable_sentiment = True
        self.emotion_labels = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

    def analyze_sentiment(self, text):
        try:
            inputs = self.bert_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
            with torch.no_grad():
                outputs = self.bert_model(**inputs)
            probs = torch.softmax(outputs.logits, dim=1)
            emotion_idx = probs.argmax().item()
            return self.emotion_labels[emotion_idx], probs[0][emotion_idx].item()
        except Exception as e:
            return "unknown", 0.0

    def generate_response(self, user_input, max_length=100):
        try:
            prompt = f"Client: {user_input}"
            if self.enable_sentiment:
                emotion, confidence = self.analyze_sentiment(user_input)
                prompt += f" [Emotion: {emotion}, Confidence: {confidence:.2f}]"
            prompt += " Therapist:"

            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}

            outputs = self.model.generate(
                **inputs,
                max_length=max_length,
                num_return_sequences=1,
                do_sample=True,
                top_p=0.9,
                temperature=0.7,
                pad_token_id=self.tokenizer.pad_token_id
            )

            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            therapist_response = response.split("Therapist:")[-1].strip()
            return therapist_response
        except Exception as e:
            return "I'm sorry, I couldn't process that. Can you try again?"

# Streamlit UI
st.title("Mental Health Chatbot")
st.write("Talk to our AI therapist trained to provide empathetic responses.")

if model is None or tokenizer is None:
    st.error("Failed to load models. Please check the notebook logs.")
else:
    chatbot = MentalHealthChatbot(model, tokenizer, bert_model, bert_tokenizer)
    
    # Chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # User input
    if user_input := st.chat_input("How are you feeling today?"):
        # Add user message
        st.session_state.messages.append({"role": "user", "content": user_input})
        with st.chat_message("user"):
            st.markdown(user_input)

        # Generate response
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                response = chatbot.generate_response(user_input)
                st.markdown(response)
        st.session_state.messages.append({"role": "assistant", "content": response})

In [None]:
from pyngrok import ngrok

# Set ngrok authtoken
ngrok.set_auth_token("YOUR_NGROK_AUTH_TOKEN")  # Replace with your ngrok token

# Start Streamlit server
!streamlit run app.py --server.port 8501 &

# Create ngrok tunnel
public_url = ngrok.connect(8501)
print(f"Streamlit app running at: {public_url}")

In [None]:
test_input = "I'm feeling really anxious today."
response = chatbot.generate_response(test_input)
print(f"User: {test_input}")
print(f"Therapist: {response}")