<a href="https://www.kaggle.com/code/gouthamvarmaindukuri/mind-companion?scriptVersionId=208259590" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# Install dependencies
!pip install -q transformers accelerate bitsandbytes peft
!pip install -q huggingface_hub

In [None]:
# Download dataset directly
!wget https://huggingface.co/datasets/Amod/mental_health_counseling_conversations/resolve/main/combined_dataset.json

In [None]:
# Add at the start of your code
import os
import warnings
warnings.filterwarnings("ignore")

# Set environment variables
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["JAX_DISABLE_FORK"] = "1"

# Update torch amp settings
import torch
torch.amp.GradScaler = lambda *args, **kwargs: torch.amp.GradScaler("cuda", *args, **kwargs)

In [None]:
import os
import json
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
from huggingface_hub import login

In [None]:
# Print GPU info
!nvidia-smi

In [None]:
# Clear any existing memory
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

In [None]:
# Load JSONL file
data = []
with open('combined_dataset.json', 'r') as f:
    for line in f:
        try:
            data.append(json.loads(line.strip()))
        except json.JSONDecodeError:
            continue

# Convert to pandas DataFrame
df = pd.DataFrame(data)

In [None]:
# Print sample to verify data
print("Sample data:")
print(df.head())
print("\nColumns:", df.columns.tolist())

In [None]:
# Split into train/val
train_df = df.sample(frac=0.8, random_state=42)
val_df = df.drop(train_df.index)

# Convert to HF datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [None]:
print(f"\nTraining examples: {len(train_dataset)}")
print(f"Validation examples: {len(val_dataset)}")

In [None]:
# Set your Hugging Face token
HF_TOKEN = "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"  # Replace with your token
login(token=HF_TOKEN)

In [None]:
# Format conversations
def format_conversation(example):
    return {
        'text': f"User: {example['Context']}\nAssistant: {example['Response']}"
    }

train_dataset = train_dataset.map(format_conversation)
val_dataset = val_dataset.map(format_conversation)

print("\nSample formatted conversation:")
print(train_dataset[0]['text'])

In [None]:
# Get current device
device = torch.cuda.current_device()

In [None]:
# Configure 4-bit quantization with maximum memory savings
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Initialize model and tokenizer
print("\nInitializing model and tokenizer...")
model_name = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(
    model_name, 
    token=HF_TOKEN,
    trust_remote_code=True
)

In [None]:
# Model loading with different memory settings
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=HF_TOKEN,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
    use_cache=False
)

In [None]:
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Get PEFT model
model = get_peft_model(model, lora_config)
print("\nTrainable parameters:")
model.print_trainable_parameters()

In [None]:
# Tokenize datasets
def tokenize(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

print("\nTokenizing datasets...")
tokenized_train = train_dataset.map(tokenize, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize, batched=True, remove_columns=val_dataset.column_names)

In [None]:
# Memory optimization environment variables
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"  # Simplified memory config

In [None]:
# Training arguments - balanced optimization
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=1,        # Minimal batch size
    per_device_eval_batch_size=1,         
    gradient_accumulation_steps=32,       # Increased to compensate for small batch
    warmup_steps=50,                    
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=20,                   
    eval_strategy="epoch",              # Keep evaluation, but only per epoch
    save_strategy="epoch",             
    load_best_model_at_end=True,       # Keep this for best model
    gradient_checkpointing=True,
    report_to="tensorboard",           # Keep tensorboard reporting
    remove_unused_columns=False,
    learning_rate=3e-4,                
    fp16=True,                         
    max_grad_norm=0.3,                 
    optim="paged_adamw_32bit",
    lr_scheduler_type="cosine",        
    dataloader_num_workers=0,
    gradient_checkpointing_kwargs={"use_reentrant": False}
)
# Additional model loading parameters
model_kwargs = {
    "device_map": "auto",
    "max_memory": {0: "10GB"},  # Limit memory usage
    "torch_dtype": torch.float16
}

In [None]:
# Initialize trainer
print("\nInitializing trainer...")
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

# Train with error handling
print("\nStarting training...")
try:
    trainer.train()
except Exception as e:
    print(f"Error during training: {str(e)}")
    # Free up memory
    torch.cuda.empty_cache()
    raise e

# Save trained model
print("\nSaving model...")
trainer.model.save_pretrained("./final_model_lora")

In [None]:
def generate_response(prompt, max_length=256):
    try:
        formatted_prompt = (
            f"User: {prompt}\n"
            "Assistant: I hear you, and what you're feeling is valid. You're not alone in this, and there are ways to help. "
            "Let me share some supportive suggestions that might help you feel better. "
        )
        
        inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=max_length).to(model.device)
        
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            temperature=0.6,
            do_sample=True,
            top_p=0.85,
            top_k=40,
            no_repeat_ngram_size=3,
            repetition_penalty=1.3,
            length_penalty=1.1
        )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = response.replace(formatted_prompt, "")
        
        # Combined list of patterns to remove
        patterns_to_remove = [
            # Endings/Signatures
            "Please contact", "Best,", "Best regards", "Sincerely",
            "Dr.", "Licensed", "Certified", "Therapist", "Counselor",
            "I hope this helps", "Remember,", "reach out", ":)", "💫",
            "Best wishes", "Take care", "Warm regards", "Contact me",
            "For more information", "Feel free to",
            
            # Assumptions/References
            "you mentioned", "you said", "already", "as we discussed",
            "years in", "my suggestion", "I am", "my experience",
            "If you are in", "please contact", "call", "website",
            "helpline", "1-800", "1-", "800-", "www.", "http"
        ]
        
        for pattern in patterns_to_remove:
            if pattern.lower() in response.lower():
                response = response.split(pattern)[0]
        
        return response.strip()
        
    except Exception as e:
        return f"Error generating response: {str(e)}"
        
# Test examples
test_prompts = [
    "I've been feeling really anxious lately about work.",
    "I can't sleep at night because of stress.",
    "I feel lonely and isolated."
]

print("\nTesting model with example prompts:")
for prompt in test_prompts:
    response = generate_response(prompt)
    print(f"\nUser: {prompt}")
    print(f"Assistant: {response}")

In [None]:
# save both model and tokenizer
output_dir = "./supportive-ai-model"

# Save model
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Model and tokenizer saved to {output_dir}")

In [None]:
!pip install gradio --quiet

In [None]:
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the saved model and tokenizer
model_path = "./supportive-ai-model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

def chat_response(message, history):
    # Format the prompt similar to training data
    formatted_prompt = f"User: {message}\nAssistant: "
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate response
    outputs = model.generate(
        **inputs,
        max_length=512,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True,
        top_p=0.85,
        top_k=40,
        no_repeat_ngram_size=3,
        repetition_penalty=1.3,
        pad_token_id=tokenizer.eos_token_id
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract only the assistant's response
    response = response.split("Assistant: ")[-1].strip()
    
    return response

# Create Gradio Interface
demo = gr.ChatInterface(
    fn=chat_response,
    title="Mental Health Support Assistant",
    description="A supportive AI assistant trained to provide empathetic responses to mental health concerns. Please note: This is not a replacement for professional mental health support.",
    theme="soft",
    examples=[
        "I've been feeling really anxious lately about work.",
        "I can't sleep at night because of stress.",
        "I feel lonely and isolated."
    ]
)

# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)