In [4]:
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer, pipeline, DataCollatorForLanguageModeling
import torch
from torch.utils.data import Dataset
import os

# Ensure the save directory exists
model_save_dir = "./model"
os.makedirs(model_save_dir, exist_ok=True)

# Load and preprocess the dataset
data_path = r"C:\Users\aasth\OneDrive\Desktop\LLM_Mental_Health_Support_Chatbot\Dataset\mental_health_conversational_dataset_train.csv"
data = pd.read_csv(data_path)

def preprocess_text(row):
    parts = row.split("<<<ASSISTANT>>>:")
    question = parts[0].replace("<<<HUMAN>>>:", "").strip().lower()
    answer = parts[1].strip().lower() if len(parts) > 1 else ""
    return f"{question} {answer}"

data['processed_text'] = data['text'].apply(preprocess_text)

# Tokenization
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

# Encode the data
inputs = tokenizer(data['processed_text'].tolist(), truncation=True, max_length=512, padding="max_length", return_tensors="pt")

# Adjusted Dataset preparation
class MentalHealthDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = item['input_ids'].clone()
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

# Adjusted dataset initialization with the corrected inputs
dataset = MentalHealthDataset(inputs)

# Model preparation
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.resize_token_embeddings(len(tokenizer))

training_args = TrainingArguments(
    output_dir=model_save_dir,
    num_train_epochs=4,
    per_device_train_batch_size=2,  # Adjust based on your GPU/CPU memory
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_steps=1000,
    eval_steps=1000,  # Ensure this aligns with save_steps or as per your evaluation frequency requirement
    evaluation_strategy="steps",  # Align evaluation strategy with save strategy
    load_best_model_at_end=True,
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

# Model training
trainer.train()

# Save the model
trainer.save_model(model_save_dir)

# Evaluation function refined for clarity and usability
def evaluate_advice(prompt):
    chatbot = pipeline('text-generation', model=model, tokenizer=tokenizer)
    response = chatbot(prompt, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)[0]['generated_text']
    return response.strip()

Step,Training Loss,Validation Loss


In [2]:
# Specifying the model to use for sentiment analysis
sentiment_model = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

def collect_feedback():
    feedback = input("Feedback on the advice: ")
    return feedback

def chat_with_sentiment_analysis():
    print("Welcome to the Mental Health Support Chatbot. Type 'quit' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            break
        sentiment_result = sentiment_model(user_input)
        sentiment = sentiment_result[0]['label']
        if sentiment == 'NEGATIVE':
            print("It sounds like you're going through a tough time. Let's see if I can help.")
        else:
            print("That's good to hear! How can I assist you today?")
        response = evaluate_advice(user_input)
        print(f"Assistant: {response}\n")
        feedback = collect_feedback()
        print(f"Thank you for your feedback: {feedback}")

NameError: name 'pipeline' is not defined

if __name__ == "__main__":
    chat_with_sentiment_analysis()


# emotional response for currect training model if necessary

In [3]:


# Part 5: Loading Model and Tokenizer for Interaction
def load_model_and_tokenizer(model_dir):
    model = GPT2LMHeadModel.from_pretrained(model_dir)
    tokenizer = GPT2Tokenizer.from_pretrained(model_dir)
    chatbot = pipeline('text-generation', model=model, tokenizer=tokenizer)
    return chatbot

# Initialize Sentiment Analysis Pipeline
sentiment_model = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

#Interactive Chat Function with Sentiment Analysis and Feedback Collection
def chat_with_sentiment_analysis(chatbot):
    print("Welcome to the Mental Health Support Chatbot. Type 'quit' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            break
        
        # Analyze the sentiment of the input
        sentiment_result = sentiment_model(user_input)
        sentiment = sentiment_result[0]['label']
        
        # Tailor the response based on the sentiment
        if sentiment == 'NEGATIVE':
            print("It sounds like you're going through a tough time. Let's see if I can help.")
        else:
            print("That's good to hear! How can I assist you today?")
        
        # Generate the response from the chatbot
        generated_responses = chatbot(user_input, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)
        response = generated_responses[0]['generated_text']
        response_trimmed = trim_to_last_sentence(response)
        print(f"Assistant: {response_trimmed}\n")
        
        # Collect user feedback on the response
        feedback = input("Feedback on the advice: ")
        sentiment_result = sentiment_model(feedback)
        sentiment_feedback = sentiment_result[0]['label']
        print(f"Feedback sentiment: {sentiment_feedback}")

# Part 6: Start the Chat Session with Sentiment Analysis
if __name__ == "__main__":
    model_dir = "./model"  # Ensure this points to the directory where your model is saved
    chatbot = load_model_and_tokenizer(model_dir)
    chat_with_sentiment_analysis(chatbot)

NameError: name 'pipeline' is not defined