In [7]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
from sklearn.metrics.pairwise import distance
import json

# Initialize the model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Read data
topics_df = pd.read_csv('csv/5ShotLlama3+Keyphrases_Topics.csv', sep=";")
ground_truth_df = pd.read_csv('csv/GroundTruthV2.csv', sep=";")

# Parse the GroundTruth field into a dictionary
def parse_ground_truth(ground_truth_str):
    return {k.strip(): int(v.strip()) for k, v in (item.split(':') for item in ground_truth_str.split(','))}

ground_truth_df['TopicsCount'] = ground_truth_df['TopicsCount'].apply(parse_ground_truth)


# Function to calculate cosine similarity
def calculate_similarity(embeddings1, embeddings2):
    return util.pytorch_cos_sim(embeddings1, embeddings2)


# Calculate diversity based on embeddings
def calculate_diversity(embeddings):
    n = len(embeddings)
    if n < 2:
        return 0  # No diversity score if less than 2 topics
    distances = []
    for i in range(n):
        for j in range(i + 1, n):
            dist = 1 - util.pytorch_cos_sim(embeddings[i], embeddings[j])
            adjusted_dist = dist / 2  # Adjusting the range to 0 to 1
            distances.append(adjusted_dist)
    return sum(distances) / len(distances) if distances else 0


def compute_metrics(article_id, topics, ground_truth):
    ground_truth_topics = ground_truth[article_id]
    ground_truth_sorted = sorted(ground_truth_topics.items(), key=lambda item: item[1], reverse=True)
    
    N = min(8, len(ground_truth_sorted))
    top_n_ground_truth = dict(ground_truth_sorted[:N])
    correct_topics = set(topics).intersection(top_n_ground_truth.keys())
    
    ## EXACT-MATCHING
    
    # Exact-Match Precision
    ExactMatch_precision = len(correct_topics) / len(topics) if topics else 0
    
    # Exact-Match Weighted Recall
    correct_weights = sum(top_n_ground_truth[topic] for topic in correct_topics)
    total_weight_top_n = sum(top_n_ground_truth.values())
    ExactMatch_weighted_recall = correct_weights / total_weight_top_n if total_weight_top_n else 0
    
    # Exact-Match F1 Score
    ExactMatch_f1_score = 2 * (ExactMatch_precision * ExactMatch_weighted_recall) / (ExactMatch_precision + ExactMatch_weighted_recall) if ExactMatch_precision + ExactMatch_weighted_recall != 0 else 0
    
    ## EMBEDDING-BASED
    
    # Embeddings
    topic_embeddings = model.encode(list(topics))
    gt_topic_embeddings = model.encode(list(top_n_ground_truth.keys()))
    
    # Calculate cosine similarity
    similarity_matrix = calculate_similarity(topic_embeddings, gt_topic_embeddings)
    
    # Compute Precision
    max_similarity_per_identified = similarity_matrix.max(dim=1).values
    Embeddings_precision = max_similarity_per_identified.mean().item()
    
    # Compute Weighted Recall
    max_similarity_per_gt = similarity_matrix.max(dim=0).values
    weighted_similarity_scores = max_similarity_per_gt * torch.tensor(list(top_n_ground_truth.values()))
    Embeddings_weighted_recall = weighted_similarity_scores.sum().item() / sum(top_n_ground_truth.values())
    
    # Compute F1 Score
    if Embeddings_precision + Embeddings_weighted_recall == 0:
        Embeddings_f1_score = 0
    else:
        Embeddings_f1_score = 2 * (Embeddings_precision * Embeddings_weighted_recall) / (Embeddings_precision + Embeddings_weighted_recall)
    
    ## TOPIC DIVERSITY
    
    diversity_score = calculate_diversity(topic_embeddings)
    
    return ExactMatch_precision, ExactMatch_weighted_recall, ExactMatch_f1_score, Embeddings_precision, Embeddings_weighted_recall, Embeddings_f1_score, diversity_score

# Apply metrics to all articles
ExactMatch_results = []
Embeddings_results = []
Diversity_results = []

for _, row in topics_df.iterrows():
    article_id = row['ArticleID']
    topics = row['Topics'].split(', ')
    
    ExactMatch_precision, ExactMatch_weighted_recall, ExactMatch_f1_score, Embeddings_precision, Embeddings_weighted_recall, Embeddings_f1_score, diversity = compute_metrics(article_id, topics, ground_truth_df.set_index('ArticleID')['TopicsCount'])
    
    ExactMatch_results.append({'ArticleID': article_id, 'ExactMatch_precision': ExactMatch_precision, 'ExactMatch_weighted_recall': ExactMatch_weighted_recall, 'ExactMatch_f1_score': ExactMatch_f1_score})
    Embeddings_results.append({'ArticleID': article_id, 'Embeddings_precision': Embeddings_precision, 'Embeddings_weighted_recall': Embeddings_weighted_recall, 'Embeddings_f1_score': Embeddings_f1_score})
    Diversity_results.append({'ArticleID': article_id, 'Diversity': diversity})

    
ExactMatch_results_df = pd.DataFrame(ExactMatch_results)
Embeddings_results_df = pd.DataFrame(Embeddings_results)
Diversity_results_df = pd.DataFrame(Diversity_results)


ExactMatch_final_precision = ExactMatch_results_df['ExactMatch_precision'].mean()
ExactMatch_final_weighted_recall = ExactMatch_results_df['ExactMatch_weighted_recall'].mean()
ExactMatch_final_f1_score = ExactMatch_results_df['ExactMatch_f1_score'].mean()

Embeddings_final_precision = Embeddings_results_df['Embeddings_precision'].mean()
Embeddings_final_weighted_recall = Embeddings_results_df['Embeddings_weighted_recall'].mean()
Embeddings_final_f1_score = Embeddings_results_df['Embeddings_f1_score'].mean()

final_diversity = Diversity_results_df['Diversity'].mean()

ExactMatch_final_scores = {'Final Precision': ExactMatch_final_precision, 'Final Weighted Recall': ExactMatch_final_weighted_recall, 'Final F1 Score': ExactMatch_final_f1_score}
Embeddings_scores = {'Final Precision': Embeddings_final_precision, 'Final Weighted Recall': Embeddings_final_weighted_recall, 'Final F1 Score': Embeddings_final_f1_score}


def save_results_json(ground_truth_category, filename, ExactMatch_results_df, Embeddings_results_df, Diversity_results_df):
    data_path = "evaluation_results.json"

    try:
        with open(data_path, "r") as f:
            data = json.load(f)
    except FileNotFoundError:
        data = {}

    # Update the nested structure
    exact_method_results = {
        "mean": {
            "Precision": ExactMatch_final_precision,
            "Weighted Recall": ExactMatch_final_weighted_recall,
            "F1 Score": ExactMatch_final_f1_score
        },
        "articles": ExactMatch_results_df.to_dict(orient="records")
    }
    
    embeddings_method_results = {
        "mean": {
            "Precision": Embeddings_final_precision,
            "Weighted Recall": Embeddings_final_weighted_recall,
            "F1 Score": Embeddings_final_f1_score
        },
        "articles": Embeddings_results_df.to_dict(orient="records")
    }

    diversity_method_results = {
        'Diversity': final_diversity,
        "articles": Diversity_results_df.to_dict(orient="records")
    }
    
    
    data.setdefault(ground_truth_category, {}).setdefault(filename, {})["Exact-Matching"] = exact_method_results
    data.setdefault(ground_truth_category, {}).setdefault(filename, {})["Embedding-Based"] = embeddings_method_results
    data.setdefault(ground_truth_category, {}).setdefault(filename, {})["Diversity"] = diversity_method_results

    with open(data_path, "w") as f:
        json.dump(data, f, indent=4)

# Call the function with desired ground truth category and method
save_results_json("GroundTruthV2", "5ShotLlama3+Keyphrases_Topics.csv", ExactMatch_results_df, Embeddings_results_df, Diversity_results_df)
print("Results saved to evaluation_results")




Results saved to evaluation_results
