Name: Aditya Vashishta,
Roll number: 102103546

Applying TOPSIS to find best pre-trained "Text Generation" model.

Following models will be compared:
     'GPT-2' , 'BART' , 'T5-small'  , 'XLNet' , 'GPT-Neo 125M' 

Models will be evaluated on the basis of following features:

    1. Semantic_Similarity: Measures the semantic similarity between the generated text and reference text.
    [Aim - Maximization]
    [Weight - 1]

    2. ROUGE Score: ROUGE evaluates the quality of summaries by comparing n-grams, word overlap, and other measures between the generated text and reference summaries.
    [Aim - Maximization]
    [Weight - 1]

    3. Diversity: To assess the diversity of generated text, metrics such as uniqueness, diversity of vocabulary, or the use of rare words can be considered. A good text generator should be able to produce diverse and novel outputs.
    [Aim - Maximization]
    [Weight - 1]


In [None]:
from transformers import pipeline
import time
import subprocess
from nltk.translate.bleu_score import sentence_bleu
from tempfile import NamedTemporaryFile
from rouge_score import rouge_scorer
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# To calculte semantic similarity test parameter
def semantic_coherence(prompt, generated):
    # Tokenize and vectorize the text using TF-IDF
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([prompt, generated])

    # Calculate the cosine similarity between the prompt and generated text vectors
    similarity_score = cosine_similarity(vectors[0], vectors[1])[0][0]

    return similarity_score

# To calculate ROUGE test parameter
def calculate_ROUGE(prompt, generated, rouge_metric='rouge1'):
    scorer = rouge_scorer.RougeScorer([rouge_metric], use_stemmer=True)
    scores = scorer.score(prompt, generated)
    f1_score = scores[rouge_metric].fmeasure
    return f1_score

# To calculate Diversity test parameter
def calculate_vocabulary_diversity(text):
    words = text.split()
    unique_words = set(words)
    diversity = len(unique_words) / len(words)
    return diversity

# To calculate the four criteria measures for a model w.r.t a particular prompt
def calculate_criterias(prompt , generated):
        row = []
        row.append(semantic_coherence(prompt, generated))
        row.append(calculate_ROUGE(prompt , generated))
        row.append(calculate_vocabulary_diversity(generated))
        return row

# To calculate all the criterias for different models for single prompts
def calculate_df(models , prompt):
        # Creating data frame
        columns = ['Semantic_Similarity', 'ROUGE', 'Diversity']
        df = pd.DataFrame(columns=columns)

        # Generating text , calculating parameters , appending to data frame
        for model in models:
            generated = generate_text(model , prompt , 50 , 1)
            row = calculate_criterias(political_prompts[0] , generated)
            new_row = {'Semantic_Similarity': row[0], 'ROUGE': row[1], 'Diversity': row[2]}
            df = df.append(new_row, ignore_index=True)

        # Renaming rows
        df = df.rename(index={0: 'GPT-2', 1: 'BART', 2: 'T5-small', 3: 'XLNet', 4: 'GPT-Neo 125M' })
        return df

# To generate text
def generate_text(model_name, prompt, max_length=50, num_return_sequences=1):
        print(f"Generated Text using {model_name}:")
        generator = pipeline("text-generation", model=model_name)
        result = generator(prompt, max_length=max_length, num_return_sequences=num_return_sequences)
        generated_text = result[0]['generated_text']
        return generated_text



In [None]:
# List of pretrained models for text generation
models = ["gpt2" , "facebook/bart-large-cnn" , "t5-small" , "xlnet-base-cased" , "EleutherAI/gpt-neo-125M"]

# List of prompts to test the models
political_prompts = ["Politics will be game changer for the world because" , "Worst President of USA was" , "As compared to monarchy , democracy is" , "2024 India elections are" , "Congress Party served for over 20 years , but still"]
sports_prompts = ["Virat Kohli is the best batsman because" , "Football was the most" , "Ronnie Coleman is the most awarded" , "Max Verstappen is a very talented F1 driver which is evident from" , "One of the lesser known sports is"]
science_prompts = ["An atom consists of" , "Rocket thrusts upwards by" , "Full form of VIBGYOR is" , "When apple fell on Newton's head , he discovered" , "Black holes were considered to be a myth until"]

In [None]:
df = calculate_df(models , sports_prompts[4])
df.to_csv('Assignment_3_TOPSIS_for_Text_Genration/o25.csv')

In [None]:
# Applying TOPSIS 
from Topsis import __main__ as TPS

TPS.TOPSIS("Assignment_3_TOPSIS_for_Text_Genration/o25.csv" , "1,1,1" , "+,+,+" , "Assignment_3_TOPSIS_for_Text_Genration/f25.csv")