In [None]:
# -----------------------------------------"Testing the Pre-Trained Bot"-----------------------------------------

import random
from transformers import pipeline, BlenderbotForConditionalGeneration, BlenderbotTokenizer


jokes = [
    "Why don't skeletons fight each other? They don't have the guts!",
    "I told my computer I needed a break, and now it won’t stop sending me Kit-Kats.",
    "What did one ocean say to the other ocean? Nothing, they just waved.",
    "Why don’t orphans play hide and seek? Because no one will look for them."
]

def tell_joke():
    return random.choice(jokes)

# Load BlenderBot model
tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")

def generate_response(user_input):      # Generate chatbot response
    inputs = tokenizer([user_input], return_tensors="pt", padding=True, truncation=True)
    output = model.generate(inputs["input_ids"], max_length=100, num_beams=5, no_repeat_ngram_size=2)
    return tokenizer.decode(output[0], skip_special_tokens=True)

sentiment_analyzer = pipeline("sentiment-analysis") # Load sentiment and emotion models
emotion_analyzer = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")

def analyze_sentiment_and_emotion(text):    # Analyze sentiment and emotion
    sentiment = sentiment_analyzer(text)[0]
    emotion = emotion_analyzer(text)[0]
    return sentiment, emotion


def chatbot_interaction():    # Main training or interaction loop
    print("Chatbot: Hi! How can I help you today? (Type 'bye' to end the conversation.)")

    while True:
        user_input = input("You: ").strip()
        
        if not user_input:
            print("Chatbot: Please enter a message.")
            continue
        
        if "bye" in user_input.lower():
            print("Chatbot: Goodbye! Take care!")
            break
        
        if "joke" in user_input.lower():
            bot_response = tell_joke()
        else:
            sentiment, emotion = analyze_sentiment_and_emotion(user_input)
            bot_response = generate_response(user_input)
            bot_response += f"\n(Sentiment: {sentiment['label']}, Emotion: {emotion['label']})"
        
        print(f"Chatbot: {bot_response}")


if __name__ == "__main__":   # Start the chatbot interaction
    chatbot_interaction()


In [1]:
# -----------------------------------------"Loading the Dataset"-----------------------------------------

import random
import pandas as pd
from transformers import pipeline, BlenderbotForConditionalGeneration, BlenderbotTokenizer
from datasets import Dataset
from transformers import Trainer, TrainingArguments
 
data = pd.read_csv("C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/data.csv")

# Convert the dataframe into a Hugging Face Dataset format
dataset = Dataset.from_pandas(data)


In [2]:
# -----------------------------------------"Applying the Tokenizer to the Dataset"-----------------------------------------
# Load BlenderBot model and tokenizer
tokenizer = BlenderbotTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = BlenderbotForConditionalGeneration.from_pretrained("facebook/blenderbot-400M-distill")

# Tokenize the input and response columns
def tokenize_function(examples):
    return tokenizer(examples['input'], examples['response'], padding="max_length", truncation=True)

# Apply the tokenizer to the entire dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/207 [00:00<?, ? examples/s]

In [4]:
# -----------------------------------------"Preprocessing and Training"-----------------------------------------
from transformers import Trainer, TrainingArguments

# Set up training arguments with no evaluation
training_args = TrainingArguments(
    output_dir="./results",          # output directory
    evaluation_strategy="no",        # disable evaluation
    learning_rate=2e-5,              # learning rate
    per_device_train_batch_size=4,   # batch size for training
    num_train_epochs=3,              # number of epochs
    weight_decay=0.01,               # weight decay
    save_total_limit=3,              # limit total amount of checkpoints
)

# Custom dataset preprocessing function
def preprocess_data(tokenizer, examples):
    # Tokenize the input field (for encoder) with truncation and padding to max length of 128
    model_inputs = tokenizer(examples['input'], max_length=128, truncation=True, padding='max_length')

    # Tokenize the response field (for decoder) with truncation and padding to max length of 128
    labels = tokenizer(examples['response'], max_length=128, truncation=True, padding='max_length')

    # Replace the labels with -100 where the padding token is, so those tokens are ignored during loss calculation
    labels = labels['input_ids']
    labels = [-100 if token == tokenizer.pad_token_id else token for token in labels]

    model_inputs['labels'] = labels
    return model_inputs


# Prepare dataset (tokenized_dataset is assumed to be available)
tokenized_dataset = tokenized_dataset.map(
    lambda examples: preprocess_data(tokenizer, examples),
    batched=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,                     # Pretrained model
    args=training_args,              # Training arguments
    train_dataset=tokenized_dataset, # Training data
    tokenizer=tokenizer              # Tokenizer
)

# Start training
trainer.train()




Map:   0%|          | 0/207 [00:00<?, ? examples/s]

  trainer = Trainer(


  0%|          | 0/156 [00:00<?, ?it/s]

{'train_runtime': 1582.2702, 'train_samples_per_second': 0.392, 'train_steps_per_second': 0.099, 'train_loss': 0.23526558509239784, 'epoch': 3.0}


TrainOutput(global_step=156, training_loss=0.23526558509239784, metrics={'train_runtime': 1582.2702, 'train_samples_per_second': 0.392, 'train_steps_per_second': 0.099, 'total_flos': 168939649105920.0, 'train_loss': 0.23526558509239784, 'epoch': 3.0})

In [5]:
# -----------------------------------------"Saving the Fine-tuned Model"-----------------------------------------

model.save_pretrained("C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot")
tokenizer.save_pretrained("C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot")


('C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot\\tokenizer_config.json',
 'C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot\\special_tokens_map.json',
 'C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot\\vocab.json',
 'C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot\\merges.txt',
 'C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot\\added_tokens.json')

In [None]:
# -----------------------------------------"Loading the Fine-Tuned Bot"-----------------------------------------
# Predefined jokes for fun responses
jokes = [
    "Why don't skeletons fight each other? They don't have the guts!",
    "I told my computer I needed a break, and now it won’t stop sending me Kit-Kats.",
    "What did one ocean say to the other ocean? Nothing, they just waved.",
    "Why don’t orphans play hide and seek? Because no one will look for them."
]

def tell_joke():
    return random.choice(jokes)

# Load your fine-tuned model
tokenizer = BlenderbotTokenizer.from_pretrained("./fine_tuned_blenderbot")
model = BlenderbotForConditionalGeneration.from_pretrained("./fine_tuned_blenderbot")

# Generate chatbot response
def generate_response(user_input):
    inputs = tokenizer([user_input], return_tensors="pt", padding=True, truncation=True)
    output = model.generate(inputs["input_ids"], max_length=100, num_beams=5, no_repeat_ngram_size=2)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Load sentiment and emotion models
sentiment_analyzer = pipeline("sentiment-analysis")
emotion_analyzer = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")

# Analyze sentiment and emotion
def analyze_sentiment_and_emotion(text):
    sentiment = sentiment_analyzer(text)[0]
    emotion = emotion_analyzer(text)[0]
    return sentiment, emotion


In [None]:
# -----------------------------------------"Evaluation"-----------------------------------------
from transformers import BlenderbotForConditionalGeneration, BlenderbotTokenizer
import evaluate
import pandas as pd

# Load the fine-tuned model and tokenizer
model_path = "C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/fine_tuned_blenderbot"
tokenizer = BlenderbotTokenizer.from_pretrained(model_path)
model = BlenderbotForConditionalGeneration.from_pretrained(model_path)

# Load the evaluation dataset
data_path = "C:/Users/HP/OneDrive/Documents/NLP_Project/Implementation_Folder/App/data.csv"
data = pd.read_csv(data_path)

# Prepare inputs and references
inputs = data["input"].tolist()
references = data["response"].tolist()

# Generate predictions
predictions = []
for input_text in inputs:
    inputs_encoded = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    outputs = model.generate(inputs_encoded["input_ids"], max_length=50, num_beams=5, no_repeat_ngram_size=2)
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predictions.append(prediction)

# Tokenize references and predictions
tokenized_references = [tokenizer.encode(ref, add_special_tokens=True) for ref in references]
tokenized_predictions = [tokenizer.encode(pred, add_special_tokens=True) for pred in predictions]

# Binary match for sequence-level accuracy
binary_matches = [
    int(pred == ref) for pred, ref in zip(tokenized_predictions, tokenized_references)
]
accuracy_score = sum(binary_matches) / len(binary_matches)

# Compute BLEU score
bleu = evaluate.load("sacrebleu")
bleu_score = bleu.compute(predictions=predictions, references=[[ref] for ref in references])

# Display results
print("BLEU Score:", bleu_score)
print("Accuracy Score:", accuracy_score)


In [None]:
# -----------------------------------------"Testing the Fine-Tuned Bot"-----------------------------------------
# Fine-Tuned Bot
def chatbot_interaction():
    print("Chatbot: Hi! How can I help you today? (Type 'bye' to end the conversation.)")

    while True:
        user_input = input("You: ").strip()
        
        if not user_input:
            print("Chatbot: Please enter a message.")
            continue
        
        if "bye" in user_input.lower():
            print("Chatbot: Goodbye! Take care!")
            break
        
        if "joke" in user_input.lower():
            bot_response = tell_joke()
        else:
            sentiment, emotion = analyze_sentiment_and_emotion(user_input)
            bot_response = generate_response(user_input)
           # bot_response += f"\n(Sentiment: {sentiment['label']}, Emotion: {emotion['label']})"
        
        print(f"Chatbot: {bot_response}")


if __name__ == "__main__":
    chatbot_interaction()
