# Fine-tuning Llama2 for Mental Health Counseling

The trained model is accessible on Huggingface under langecod/CounselLlama7B.

# 1.) Import & Install Necessary libraries (Colab requires installs with each run time)

In [None]:
pip install transformers datasets peft trl accelerate bitsandbytes packaging ninja sentencepiece

In [None]:
!nvcc --version

In [None]:
pip install flash-attn --no-build-isolation

In [None]:
import random
import gc
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
import numpy as np
import pandas as pd
import transformers
import accelerate
import bitsandbytes as bnb
from datasets import load_dataset, concatenate_datasets
import torch

# 2. Loading the Mental Health Conversations Dataset:
The Amod/mental_health_counseling_conversations dataset is a collection of 3512 questions and answer pairs sourced from counselchat.com, an online counseling and therapy platform. It covers a wide range of mental health topics with responses crafted from certified psychologists. It's tailored for refining language models, specifically for generating cogent advice on mental health inquiries. All entries pairs are in English, and each entry is structured with a 'Context' (user's question) and a 'Response' (psychologist's answer).

In [None]:
dataset = load_dataset("nbertagnolli/counsel-chat", split="train")
dataset

In [None]:
import pandas as pd
# Convert to DataFrame
df = pd.DataFrame(dataset)

# Display the first few rows of the DataFrame
df.head(2)



In [None]:

# Filter the required columns
#filtered_df = df[['questionText', 'topic']].drop_duplicates()
filtered_df = df[['questionText', 'topic', 'answerText']].drop_duplicates(subset=['questionText', 'topic'])
# Rename the columns
filtered_df.columns = ['Context', 'topic', 'Response']

# Group by topic and count the occurrences
topic_counts = filtered_df['topic'].value_counts()

# Calculate the target number of samples per topic for the test set
target_test_size_per_topic = (topic_counts * 0.2).round().astype(int)

# Initialize an empty DataFrame for the test set
test_set_balanced = pd.DataFrame(columns=filtered_df.columns)

# Initialize an empty DataFrame for the train set
train_set_balanced = pd.DataFrame(columns=filtered_df.columns)

# For each topic, randomly select the calculated number of samples to include in the test set
for topic, target_size in target_test_size_per_topic.items():
    samples = filtered_df[filtered_df['topic'] == topic].sample(n=min(target_size, topic_counts[topic]), random_state=42)
    test_set_balanced = pd.concat([test_set_balanced, samples])

        # Add the remaining samples to the train set
    train_set_balanced = pd.concat([train_set_balanced, df[df['topic'] == topic].drop(samples.index)])

print("train data shape",train_set_balanced.shape )
print("test data shape",test_set_balanced.shape )

In [None]:

# Save the balanced test set
test_set_balanced.to_csv('counsel_chat_test_balanced.csv', index=False)

# Save the balanced train set
train_set_balanced.to_csv('counsel_chat_train_balanced.csv', index=False)


# Check the final distribution of topics in the balanced test set
balanced_test_distribution = test_set_balanced['topic'].value_counts()

print(balanced_test_distribution)
print("test data shape: \n",test_set_balanced.head() )

In [None]:
train_df = pd.read_csv("counsel_chat_train_balanced.csv")

# Filter the required columns
df = train_df[['questionText', 'answerText']]
# Rename the columns
df.columns = ['Context', 'Response']

df.head()

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd

# Assuming 'final_df' is your DataFrame loaded with pd.read_csv("formatted_data.csv")
#train_df, test_df = train_test_split(new_df, test_size=0.2, random_state=42)  # Splitting 20% for testing

# Save the train and test datasets to CSV files
#test_df.to_csv('test_data.csv', index=False)
train_df = df.sample(frac=1, random_state=42)

test_df = pd.read_csv("counsel_chat_test_balanced.csv")
test_df1 = test_df[['Context', 'Response']]

print(test_df.shape)
print(train_df.shape)


In [None]:
test_df1.to_csv('counsel_chat_train_balanced_one.csv', index=False)


In [None]:

# If you want to save the new dataframe to a CSV file:

train_df.to_csv('counsel_chat_train_balanced.csv', index=False)


In [None]:

train = load_dataset("csv", data_files="counsel_chat_train_balanced.csv", split="train")
#print(dataset["Text"][400])
train

In [None]:
test = load_dataset("csv", data_files="counsel_chat_train_balanced_one.csv" , split="train")
#print(dataset["Text"][400])
test

# 3. Importing, Quantizing, and Preparing the Llama2 Chat Model:
To safeguard private health information and intellectual property, utilizing an open-sourced model is imperative. Meta's Llama 2 stands out in this regard, offering a collection of pretrained and fine-tuned large language models (LLMs) that span from 7 billion to 70 billion parameters. The Llama 2-Chat variant is especially tailored for dialogue applications, demonstrating superior performance over other open-source chat models in various benchmarks and human evaluations for both helpfulness and safety. This made the 7 billion parameter Llama 2-Chat model an ideal choice for our prototype. Additionally, to address memory constraints, expedite training, and ensure cost-effective operations, we employed a version of the model with 4-bit weights and activations through quantization.

In [None]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16 # A100
)

#Load Tokenizer
tokenizer= AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf', token='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')
# Add Padding Token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"




In [None]:
# Load the LLaMA model in 4-bit
model = transformers.AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    token='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',
    quantization_config=nf4_config,
    use_flash_attention_2=False  #Improves attention algorithm from quadratic time down to linear
)

Using a 4-bit quantized model offers advantages in terms of memory usage, training speed, and inference performance. However, such quantization makes the model incompatible with conventional training approaches. To address this challenge, the "Quantized Low-Rank Adaptation" (QLoRA) method is employed. In QLoRA, the original pre-trained model weights remain frozen in 4-bit format, but an "adapter" with 16-bit model weights is created, allowing for fine-tuning on a specific task.

In [None]:

import re
def get_num_layers(model):
    numbers = set()
    for name, _ in model.named_parameters():
        for number in re.findall(r'\d+', name):
            numbers.add(int(number))
    return max(numbers)

def get_last_layer_linears(model):
    names = []
    
    num_layers = get_num_layers(model)
    for name, module in model.named_modules():
        if str(num_layers) in name and not "encoder" in name:
            if isinstance(module, torch.nn.Linear):
                names.append(name)
    return names

peft_config = LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=get_last_layer_linears(model),
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)


In [None]:
# prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# 4.) Setting up the Trainer & Prompting:


In [None]:
#pip install --upgrade torch transformers


In [None]:
#dataset

In [None]:
# System message to better instruct chatbot
'''
system_message = """You are a helpful and and truthful psychology and psychotherapy assistant. Your primary role is to provide empathetic, understanding, and non-judgmental responses to users seeking emotional and psychological support.
                  Always respond with empathy and demonstrate active listening; try to focus on the user. Your responses should reflect that you understand the user's feelings and concerns. If a user expresses thoughts of self-harm, suicide, or harm to others, prioritize their safety.
                  Encourage them to seek immediate professional help and provide emergency contact numbers when appropriate.  You are not a licensed medical professional. Do not diagnose or prescribe treatments.
                  Instead, encourage users to consult with a licensed therapist or medical professional for specific advice. Avoid taking sides or expressing personal opinions. Your role is to provide a safe space for users to share and reflect.
                  Remember, your goal is to provide a supportive and understanding environment for users to share their feelings and concerns. Always prioritize their well-being and safety."""
'''
system_message = """You are supportive psychology and psychotherapy assistant, provide empathetic, non-judgmental responses, reflecting active listening and understanding of the user's emotions. Safety is paramount; You prioritize users' well-being, especially if they mention thoughts of self-harm, suicide, or harm to others."""



def format_llama(entry):
  formatted = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{entry['Context']} [/INST]  {entry['Response']}  </s>"

  return formatted

In [None]:
#dataset['train'][0]
train

In [None]:
#print(format_llama(dataset['train'][0]))
args = TrainingArguments(
    output_dir="CounselLlama7B",
    logging_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=1,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    logging_steps=10,
    #save_strategy="epoch",
    learning_rate=1e-4,
    tf32=False, #A100
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    #load_best_model_at_end=True,
    #evaluation_strategy='epoch',

)


In [None]:


max_seq_length = 1024  # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    train_dataset=train,
    #eval_dataset=val,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=format_llama,
    args=args,

)


# 5.) Training

In [None]:
gc.collect()
torch.cuda.empty_cache()


In [None]:
# train        
trainer.train() 


In [None]:

# save model
trainer.save_model()
model.save_pretrained("llama2-finetuned")

In [None]:
from tqdm import tqdm
n = 0

model.config.use_cache = True
model.eval()
tokenizer= AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf', token='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')
# Add Padding Token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, 
                max_length=2048)

# Placeholder for the generated responses
generated_responses = []

test_df = pd.read_csv("counsel_chat_test_balanced.csv")
test_df['Context'] = test_df['Context'].fillna('')

for index, row in tqdm(test_df.iterrows()):
    prompt = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{row['Context']} [/INST]  </s>"
    #prompt = f"[INST] {row['Context'][:480]} [/INST]"
    reference = row['Response']
    
    result = pipe(prompt)
    generated_text = result[0]['generated_text']
    #print(generated_text)
    
    generated_responses.append({
            'Context': row['Context'],
            'topic': row['topic'],
            'Response': generated_text
        })
    

    if n < 5:
        print("Context:",row['Context'])
        print("generated_text:",generated_text)
        print("reference:",reference)
    
    #if n > 5:
    #    break
      
    n +=1
    
# Create a DataFrame from the generated responses
generated_df = pd.DataFrame(generated_responses)

# Write the DataFrame to an Excel file
generated_df.to_excel('Llama2_preds.xlsx', index=False)


In [None]:
import torch

# For saving PyTorch model
torch.save(model.state_dict(), "llama2-finetuned.pth")

# Convert Test Dataset to Test DataFrame

In [None]:
# Convert to DataFrame
test_df = pd.DataFrame(test)

# Display the first few rows of the DataFrame
test_df.head(2)

# 6.) Chatbot User Interface:

A simple chatbot user interface setuped so that the user can interact with the model, ask mental health related questions, and sample the responses. 

In [None]:
from IPython.core.display import display, HTML
from ipywidgets import widgets, Layout, Box
from IPython.display import clear_output

text_input = widgets.Textarea(
    value='',
    placeholder='Type your message here...',
    description='Input:',
    disabled=False,
    layout=Layout(width='38.2%')
)

button = widgets.Button(description="Submit")

In [None]:
output_area = widgets.Output(layout=Layout(width='61.8%'))

# Add a processing indication label below your text_input
processing_label = widgets.Label(value='')  # Initialize with an empty value

# System message to better instruct chatbot
system_message = """You are a helpful and and truthful psychology and psychotherapy assistant. Your primary role is to provide empathetic, understanding, and non-judgmental responses to users seeking emotional and psychological support.
                  Always respond with empathy and demonstrate active listening; try to focus on the user. Your responses should reflect that you understand the user's feelings and concerns. If a user expresses thoughts of self-harm, suicide, or harm to others, prioritize their safety.
                  Encourage them to seek immediate professional help and provide emergency contact numbers when appropriate.  You are not a licensed medical professional. Do not diagnose or prescribe treatments.
                  Instead, encourage users to consult with a licensed therapist or medical professional for specific advice. Avoid taking sides or expressing personal opinions. Your role is to provide a safe space for users to share and reflect.
                  Remember, your goal is to provide a supportive and understanding environment for users to share their feelings and concerns. Always prioritize their well-being and safety."""

# Display Greeting Message
with output_area:
  display(HTML(f'<strong>Assistant: </strong>Hi there! How are you today?'))
  display(HTML('<br/><br/>'))

def on_submit_button_clicked(b):
    with output_area:
        # Get user input
        user_input = text_input.value
        formatted = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{user_input} [/INST]"
        # Display input
        display(HTML(f'<strong>User:</strong> {user_input}'))
        display(HTML('<br/><br/>'))

        # Show processing indication
        processing_label.value = 'Processing...'

        # Use your chatbot model to get a response
        input_ids = tokenizer(formatted, return_tensors="pt", truncation=True, max_length=2048).input_ids.cuda()
        # with torch.inference_mode():
        outputs = model.generate(input_ids=input_ids, do_sample=True, top_p=0.9,temperature=0.95)
        translated_output=tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(formatted)-1:]

        # Display response (characters added to bold User/Assistant)
        display(HTML(f'<strong>Assistant:</strong> {translated_output}'))
        display(HTML('<br/><br/>'))

        # Clear the processing indication
        processing_label.value = ''

        # Clear the text input
        text_input.value = ''

button.on_click(on_submit_button_clicked)

In [None]:
# Display widgets
display(text_input, button, processing_label, output_area)

In [None]:
clear_output()

In [None]:
# Push model to hub since Google colab empties out directory
from huggingface_hub import notebook_login

notebook_login()

In [None]:
trainer.push_to_hub('')


In [None]:
pip freeze > requirements.txt
