In [None]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
import pandas as pd
import pickle
import warnings
from rouge_score.rouge_scorer import RougeScorer
from nltk.translate.bleu_score import sentence_bleu
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, PeftModel, PeftConfig
from evaluate import load
import random
import numpy as np
import os

warnings.filterwarnings("ignore")
bertscore = load("bertscore")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
def set_random_seed(seed: int):
    print("Seed: {}".format(seed))
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.deterministic = True
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_random_seed(1)


In [None]:
model_name = "lmsys/vicuna-7b-v1.5"
tokenizer = LlamaTokenizer.from_pretrained(model_name, model_max_length = 512, truncation_side = 'left')

tokenizer.pad_token = tokenizer.eos_token

model = LlamaForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

model=model.to(device)


In [None]:
import pandas as pd

def load_excel_data_grouped(excel_path: str):
    df = pd.read_excel(excel_path)
    
    print("Columns in the Excel file:", df.columns)
    
    if 'dialogue_id' not in df.columns:
        raise KeyError("'dialogue_id' column is missing in the provided Excel file.")
    
    grouped_dialogues = []
    grouped = df.groupby('dialogue_id')

    for dialogue_id, group in grouped:
        user_inputs = group['User'].tolist()
        bot_responses = group['BOT'].tolist()
        combined_input = "\n".join([f"User: {u}" for u in user_inputs])
        combined_reference = "\n".join([f"Bot: {b}" for b in bot_responses])
        
        grouped_dialogues.append({
            "dialogue_id": dialogue_id,
            "combined_user": combined_input,
            "combined_bot": combined_reference
        })
    
    return grouped_dialogues

dialogues = load_excel_data_grouped('/mnt/Data/sarmistha/Financial Chatbot/Ecosage_Correct_Code/data single sheet.xlsx')


In [None]:
import os
import pandas as pd

def generate_responses_step_by_step(dialogues):
    """
    Generate responses step-by-step for each grouped conversation.
    """
    all_generated_responses = []

    for idx, dialogue in enumerate(dialogues):
        print(f"Processing Dialogue ID: {dialogue['dialogue_id']} ({idx+1}/{len(dialogues)})")
        conversation_history = ""  
        generated_responses = []   

        user_inputs = dialogue['combined_user'].split('\n')
        for turn, user_input in enumerate(user_inputs):
            current_input = conversation_history + f"User: {user_input}\nBot:"

            inputs = tokenizer(current_input, return_tensors='pt', truncation=True).to(device)
            outputs = model.generate(
                **inputs, 
                max_new_tokens=100, 
                do_sample=True, 
                temperature=0.7, 
                top_p=0.7, 
                top_k=50, 
                return_dict_in_generate=True
            )

            generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)

            bot_response = generated_text[len(current_input):].strip().split('\n')[0]
            print(f"User: {user_input}")
            print(f"Generated Bot: {bot_response}\n")

            conversation_history += f"User: {user_input}\nBot: {bot_response}\n"

            generated_responses.append(bot_response)

        all_generated_responses.append({
            "dialogue_id": dialogue['dialogue_id'],
            "generated_responses": generated_responses
        })

    return all_generated_responses

generated_responses = generate_responses_step_by_step(dialogues)

response_data = []

for result in generated_responses:
    dialogue_id = result["dialogue_id"]
    for bot_response in result["generated_responses"]:
        response_data.append({
            "dialogue_id": dialogue_id,
            "generated_response": bot_response
        })

df_responses = pd.DataFrame(response_data)

os.makedirs('./generated_responses', exist_ok=True)

csv_file_path = './generated_responses/step_by_step_generated_responses.csv'
df_responses.to_csv(csv_file_path, index=False)

print(f"\nGeneration complete. Responses saved to {csv_file_path}")


In [None]:
def get_scores(reference_list: list, hypothesis_list: list):
    count = 0
    rouge1 = 0
    rouge2 = 0
    rougel = 0
    bleu_1 = 0
    bleu_2 = 0
    bleu_3 = 0
    bleu_4 = 0
    rouge_scorer = RougeScorer(['rouge1', 'rouge2', 'rougeL'])

    bert_precision_score = 0.0
    bert_recall_score = 0.0
    bert_f1_score = 0.0

    for reference, hypothesis in zip(reference_list, hypothesis_list):
        scores = rouge_scorer.score(reference, hypothesis)
        rouge1 += scores['rouge1'].fmeasure
        rouge2 += scores['rouge2'].fmeasure
        rougel += scores['rougeL'].fmeasure

        bert_results = bertscore.compute(predictions=[hypothesis], references=[reference], model_type='microsoft/deberta-xlarge-mnli')
        bert_precision = bert_results['precision']
        bert_recall = bert_results['recall']
        bert_f1 = bert_results['f1']

        bert_precision_score += sum(bert_precision)
        bert_recall_score += sum(bert_recall)
        bert_f1_score += sum(bert_f1)

        reference = reference.split()
        hypothesis = hypothesis.split()
        bleu_1 += sentence_bleu([reference], hypothesis, weights=(1.,))
        bleu_2 += sentence_bleu([reference], hypothesis, weights=(1./2., 1./2.))
        bleu_3 += sentence_bleu([reference], hypothesis, weights=(1./3., 1./3., 1./3.))
        bleu_4 += sentence_bleu([reference], hypothesis, weights=(1./4., 1./4., 1./4., 1./4.))
        count += 1

    return {
        "rouge_1": rouge1 * 100 / count,
        "rouge_2": rouge2 * 100 / count,
        "rouge_L": rougel * 100 / count,
        "bleu_1": bleu_1 * 100 / count,
        "bleu_2": bleu_2 * 100 / count,
        "bleu_3": bleu_3 * 100 / count,
        "bleu_4": bleu_4 * 100 / count,
        'bert_precision': bert_precision_score * 100 / count,
        'bert_recall': bert_recall_score * 100 / count,
        'bert_f1': bert_f1_score * 100 / count
    }


In [None]:
references = [
    line.strip() for dialogue in dialogues for line in dialogue['combined_bot'].split("\nBot:") if line.strip()
]
hypotheses = [
    line.strip() for generated in generated_responses for line in generated['generated_responses'] if line.strip()
]

metrics = get_scores(references, hypotheses)
print("\nEvaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.2f}")
