In [1]:
pip install pandas rouge nltk

Note: you may need to restart the kernel to use updated packages.


In [3]:
import nltk
nltk.download('punkt')
nltk.download('wordnet') 

[nltk_data] Downloading package punkt to C:\Users\Binura
[nltk_data]     Perera\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Binura
[nltk_data]     Perera\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
import pandas as pd
from nltk.tokenize import word_tokenize
from rouge import Rouge
from nltk.translate.meteor_score import meteor_score

In [5]:
# Load the CSV file
data = pd.read_csv("Data/q_and_a.csv")

In [6]:
print(data)

                                             question  \
0                                       Define a Mine   
1                                     Define a Mining   
2                                    Define a Mineral   
3                                       Define a Rock   
4                                        Define a Ore   
..                                                ...   
95  What are the advantages of wet grinding in min...   
96  Explain Principles of flotation in mineral pro...   
97            What is a Collector in Froth flotation?   
98  What are the types of magnetic separator in mi...   
99          What is Dewatering in mineral processing?   

                                     reference_answer  \
0   An excavation made in the earth to extract min...   
1   The activity, occupation, and industry concern...   
2   A naturally occurring inorganic element or com...   
3   Any naturally formed aggregate of one or more ...   
4   A mineral deposit that has

In [7]:
# Initialize ROUGE scorer
rouge = Rouge()

In [8]:
# Function to evaluate a single row and return scores
def evaluate_row(row):
    # Tokenize each answer and the reference
    reference_tokens = [word_tokenize(row['reference_answer'])]
    chatgpt_tokens = word_tokenize(row['chat_gpt_answer'])
    google_tokens = word_tokenize(row['google_gemini_answer'])
    model_tokens = word_tokenize(row['fine_tuned_model_answer'])
    
    # Prepare data for ROUGE
    chatgpt_text = ' '.join(chatgpt_tokens)
    google_text = ' '.join(google_tokens)
    model_text = ' '.join(model_tokens)
    reference_text = ' '.join(reference_tokens[0])
    
    # Calculate ROUGE scores
    chatgpt_rouge = rouge.get_scores(chatgpt_text, reference_text, avg=False)[0]
    google_rouge = rouge.get_scores(google_text, reference_text, avg=False)[0]
    model_rouge = rouge.get_scores(model_text, reference_text, avg=False)[0]
    
    # Calculate Meteor scores
    chatgpt_meteor = meteor_score(reference_tokens, chatgpt_tokens)
    google_meteor = meteor_score(reference_tokens, google_tokens)
    model_meteor = meteor_score(reference_tokens, model_tokens)
    
    # Compile results
    results = {
        'chatgpt_rouge_1': chatgpt_rouge['rouge-1']['f'],
        'chatgpt_rouge_2': chatgpt_rouge['rouge-2']['f'],
        'chatgpt_rouge_l': chatgpt_rouge['rouge-l']['f'],
        'chatgpt_meteor': chatgpt_meteor,
        'google_rouge_1': google_rouge['rouge-1']['f'],
        'google_rouge_2': google_rouge['rouge-2']['f'],
        'google_rouge_l': google_rouge['rouge-l']['f'],
        'google_meteor': google_meteor,
        'model_rouge_1': model_rouge['rouge-1']['f'],
        'model_rouge_2': model_rouge['rouge-2']['f'],
        'model_rouge_l': model_rouge['rouge-l']['f'],
        'model_meteor': model_meteor
    }
    
    return results

In [9]:
# Apply the function to each row in the dataframe
scored_data = data.apply(evaluate_row, axis=1, result_type='expand')

In [None]:
# Save results
scored_data.to_csv('Data/q_and_a_eval.csv', index=False)