<a href="https://colab.research.google.com/github/Bhawana102/TopsisAssign3/blob/main/topsis3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
import pandas as pd
from transformers import pipeline
from nltk.translate.bleu_score import corpus_bleu
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from math import exp
from tqdm import tqdm

# Load Data
train_df_ = pd.read_csv("/kaggle/input/mydata-csv/train.csv")
train_df = train_df_.head(150)

# Select Pretrained Models
models = [
    "facebook/bart-large-cnn",
    "t5-large",
    "sshleifer/distilbart-cnn-12-6",
    "google/pegasus-large",
    "allenai/led-large-16384-arxiv",
]


In [None]:
# Initialize BLEU Scores list
bleu_scores = []

# Semantic Coherence (Example implementation)
def semantic_coherence(generated_summary, dialogue):
    # Your semantic coherence metric calculation logic
    summary_tokens = word_tokenize(generated_summary.lower())
    dialogue_tokens = word_tokenize(dialogue.lower())

    # Calculate the intersection of tokens
    common_tokens = set(summary_tokens) & set(dialogue_tokens)

    # Calculate semantic coherence score based on the ratio of common tokens to summary length
    coherence_score = len(common_tokens) / len(summary_tokens)

    return coherence_score

# Factual Accuracy (Example implementation)
def factual_accuracy(generated_summary, reference_summary):
    # Your factual accuracy metric calculation logic
    gen_tokens = set(word_tokenize(generated_summary.lower()))
    ref_tokens = set(word_tokenize(reference_summary.lower()))

    # Calculate the intersection of tokens
    common_tokens = gen_tokens & ref_tokens

    # Calculate factual accuracy score based on the ratio of common tokens to reference summary length
    accuracy_score = len(common_tokens) / len(ref_tokens) if len(ref_tokens) != 0 else 0
    return accuracy_score

In [None]:
# Content Coverage (Example implementation)
def content_coverage(generated_summary, dialogue):
    # Your content coverage metric calculation logic

    summary_tokens = set(word_tokenize(generated_summary.lower()))
    dialogue_tokens = set(word_tokenize(dialogue.lower()))

    # Calculate the intersection of tokens
    common_tokens = summary_tokens & dialogue_tokens

    # Calculate the content coverage score based on the ratio of common tokens to dialogue length
    coverage_score = len(common_tokens) / len(dialogue_tokens) if len(dialogue_tokens) != 0 else 0
    return coverage_score


In [None]:
# Initialize evaluation results DataFrame
evaluation_results = pd.DataFrame(columns=["Model", "BLEU Score", "Semantic Coherence", "Factual Accuracy", "Content Coverage"])

# Initialize empty list to store evaluation results
evaluation_results_list = []

# Apply Models and Evaluate
for model_name in models:
    print(f"Evaluating model: {model_name}")

    # Initialize the summarization pipeline
    summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)

    # Initialize evaluation metric accumulators
    semantic_coherence_scores = []
    factual_accuracy_scores = []
    content_coverage_scores = []

    # Generate summaries
    generated_summaries = []
    for index, row in tqdm(train_df.iterrows(), total=len(train_df)):  # Use tqdm to show progress
        dialogue = row['dialogue']
        summary = row['summary']
        generated_summary = summarizer(dialogue, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
        generated_summaries.append(generated_summary)

        # Evaluate Semantic Coherence
        coherence_score = semantic_coherence(generated_summary, dialogue)
        semantic_coherence_scores.append(coherence_score)

        # Evaluate Factual Accuracy
        accuracy_score = factual_accuracy(generated_summary, summary)
        factual_accuracy_scores.append(accuracy_score)

        # Evaluate Content Coverage
        coverage_score = content_coverage(generated_summary, dialogue)
        content_coverage_scores.append(coverage_score)



In [None]:
  # Calculate BLEU Score
    reference_summaries = train_df["summary"].tolist()
    bleu_score = corpus_bleu([[summary] for summary in reference_summaries], generated_summaries)
    bleu_scores.append(bleu_score)

    # Append results to the evaluation results list
    evaluation_results_list.append({
        "Model": model_name,
        "BLEU Score": bleu_score,
        "Semantic Coherence": sum(semantic_coherence_scores) / len(semantic_coherence_scores),
        "Factual Accuracy": sum(factual_accuracy_scores) / len(factual_accuracy_scores),
        "Content Coverage": sum(content_coverage_scores) / len(content_coverage_scores),
    })

    # Print a separator for clarity
    print("="*50)

# Concatenate the evaluation results list into a DataFrame
evaluation_results = pd.concat([pd.DataFrame(item, index=[0]) for item in evaluation_results_list], ignore_index=True)

# Compare Results
print("BLEU Scores:", bleu_scores)

# Save evaluation results to a CSV file
evaluation_results.to_csv("evaluation_results.csv", index=False)
