In [None]:
# Install required libraries
!pip install sentence-transformers scikit-learn

import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# --- Sridevi will give you this file ---
# Make sure to upload it to your Colab environment
FILE_PATH = 'llm_outputs.csv' # Or whatever she names it

try:
    df = pd.read_csv(FILE_PATH)
    print("âœ… Data loaded successfully. Here's a sample:")
    print(df.head())
except FileNotFoundError:
    print(f"ðŸ›‘ Error: Please upload the file '{FILE_PATH}' from Sridevi.")

In [None]:
# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Get the texts from the DataFrame
original_texts = df['cleaned_transcription'].tolist()
generated_summaries = df['generated_summary'].tolist()

# Generate embeddings for both sets of text
print("\nGenerating vector embeddings...")
original_embeddings = model.encode(original_texts, show_progress_bar=True)
summary_embeddings = model.encode(generated_summaries, show_progress_bar=True)
print("âœ… Embeddings created.")

# Calculate cosine similarity for each pair
# This compares each summary to its corresponding original text
scores = []
for i in range(len(original_embeddings)):
    # The reshape(1, -1) is necessary for the function
    score = cosine_similarity(original_embeddings[i].reshape(1, -1), summary_embeddings[i].reshape(1, -1))
    scores.append(score[0][0])

# Add the scores back to the DataFrame
df['consistency_score'] = scores

print("\nâœ… Factual consistency scores calculated.")

In [None]:
print("\n--- LLM Evaluation Report ---")

# Calculate and print overall metrics
average_score = df['consistency_score'].mean()
min_score = df['consistency_score'].min()
max_score = df['consistency_score'].max()

print(f"\nAverage Consistency Score: {average_score:.4f}")
print(f"Minimum Consistency Score: {min_score:.4f}")
print(f"Maximum Consistency Score: {max_score:.4f}")

# Show the top 3 WORST performing summaries for manual review
print("\n--- Top 3 Lowest Scores (Needs Review) ---")
print(df.sort_values(by='consistency_score', ascending=True).head(3))

# Show the top 3 BEST performing summaries
print("\n--- Top 3 Highest Scores (Good Examples) ---")
print(df.sort_values(by='consistency_score', ascending=False).head(3))