In [None]:
import pandas as pd
import json


**load the data**

In [None]:
metrics = "../experiments/exp2/metrics/embedding/retrieved_query_documents_aggregated_metrics.json"

with open(metrics, "r", encoding="utf-8") as f:
    data = json.load(f)
    
print(data)

In [None]:
df = pd.DataFrame(data)


k_values = sorted(df['k'].unique())

print("=" * 100)
print("RANKING MODELS BY METRICS FOR EACH K")
print("=" * 100)

for k in k_values:
    # Filter data for current k
    k_data = df[df['k'] == k].copy()
    
    print(f"\n{'='*100}")
    print(f"K = {k}")
    print(f"{'='*100}")
    
    # Rank by Precision@k
    print(f"\nüìä RANKED BY PRECISION@{k}:")
    print("-" * 80)
    precision_ranked = k_data.sort_values('precision@k', ascending=False)
    for idx, row in precision_ranked.iterrows():
        print(f"  {row['model']:30} | {row['config']:15} | Precision@{k}: {row['precision@k']:.4f}")
    
    # Rank by Recall@k
    print(f"\nüìä RANKED BY RECALL@{k}:")
    print("-" * 80)
    recall_ranked = k_data.sort_values('recall@k', ascending=False)
    for idx, row in recall_ranked.iterrows():
        print(f"  {row['model']:30} | {row['config']:15} | Recall@{k}: {row['recall@k']:.4f}")
    
    # Rank by NDCG@k
    print(f"\nüìä RANKED BY NDCG@{k}:")
    print("-" * 80)
    ndcg_ranked = k_data.sort_values('NDCG@k', ascending=False)
    for idx, row in ndcg_ranked.iterrows():
        print(f"  {row['model']:30} | {row['config']:15} | NDCG@{k}: {row['NDCG@k']:.4f}")
    
    # Rank by MAP@k
    print(f"\nüìä RANKED BY MAP@{k}:")
    print("-" * 80)
    map_ranked = k_data.sort_values('MAP@k', ascending=False)
    for idx, row in map_ranked.iterrows():
        print(f"  {row['model']:30} | {row['config']:15} | MAP@{k}: {row['MAP@k']:.4f}")
    
    # Rank by MRR@ka
    print(f"\nüìä RANKED BY MRR@{k}:")
    print("-" * 80)
    mrr_ranked = k_data.sort_values('MRR@k', ascending=False)
    for idx, row in mrr_ranked.iterrows():
        print(f"  {row['model']:30} | {row['config']:15} | MRR@{k}: {row['MRR@k']:.4f}")

print("\n" + "=" * 100)
print("SUMMARY: TOP PERFORMERS ACROSS ALL K VALUES")
print("=" * 100)

# Overall best by each metric (averaged across all k)
overall_avg = df.groupby(['model', 'config']).agg({
    'precision@k': 'mean',
    'recall@k': 'mean',
    'NDCG@k': 'mean',
    'MAP@k': 'mean',
    'MRR@k': 'mean'
}).reset_index()

print("\nüèÜ BEST AVERAGE PRECISION:")
best_precision = overall_avg.nlargest(3, 'precision@k')
for idx, row in best_precision.iterrows():
    print(f"  {row['model']:30} | {row['config']:15} | Avg Precision: {row['precision@k']:.4f}")

print("\nüèÜ BEST AVERAGE RECALL:")
best_recall = overall_avg.nlargest(3, 'recall@k')
for idx, row in best_recall.iterrows():
    print(f"  {row['model']:30} | {row['config']:15} | Avg Recall: {row['recall@k']:.4f}")

print("\nüèÜ BEST AVERAGE NDCG:")
best_ndcg = overall_avg.nlargest(3, 'NDCG@k')
for idx, row in best_ndcg.iterrows():
    print(f"  {row['model']:30} | {row['config']:15} | Avg NDCG: {row['NDCG@k']:.4f}")

print("\nüèÜ BEST AVERAGE MAP:")
best_map = overall_avg.nlargest(3, 'MAP@k')
for idx, row in best_map.iterrows():
    print(f"  {row['model']:30} | {row['config']:15} | Avg MAP: {row['MAP@k']:.4f}")

print("\nüèÜ BEST AVERAGE MRR:")
best_mrr = overall_avg.nlargest(3, 'MRR@k')
for idx, row in best_mrr.iterrows():
    print(f"  {row['model']:30} | {row['config']:15} | Avg MRR: {row['MRR@k']:.4f}")

In [None]:
import json
import pandas as pd

metrics = "../experiments/exp2/metrics/embedding/retrieved_query_documents_aggregated_metrics.json"

with open(metrics, "r", encoding="utf-8") as f:
    data = json.load(f)

df = pd.DataFrame(data)
k_values = sorted(df['k'].unique())

for k in k_values:
    k_data = df[df['k'] == k].copy()
    k_data = k_data.sort_values('NDCG@k', ascending=False)
    
    print(f"\n{'='*120}")
    print(f"K = {k} - ALL METRICS (Sorted by NDCG@{k})")
    print(f"{'='*120}")
    print(f"{'Model':<30} | {'Config':<12} | {'Prec@k':>8} | {'Recall@k':>10} | {'MAP@k':>8} | {'NDCG@k':>8} | {'MRR@k':>8}")
    print("-" * 120)
    
    for idx, row in k_data.iterrows():
        print(f"{row['model']:<30} | {row['config']:<12} | "
              f"{row['precision@k']:>8.4f} | {row['recall@k']:>10.4f} | "
              f"{row['MAP@k']:>8.4f} | {row['NDCG@k']:>8.4f} | {row['MRR@k']:>8.4f}")

In [None]:
metrics_eval = "../experiments/exp2/data/temp/retrieved_query_documents_relevance.json"

with open(metrics_eval, "r", encoding="utf-8") as f:
    data_eval = json.load(f)
    
print(data_eval)

In [None]:
def analyze_relevance_scores(data_eval):
    """
    Calculate average number of documents with relevance scores 1 and 0.5 across all queries.
    
    Args:
        data_eval: Dictionary mapping queries to document relevance scores
    
    Returns:
        Dictionary with statistics
    """
    total_queries = len(data_eval)
    total_rel_1 = 0
    total_rel_0_5 = 0
    total_rel_0 = 0
    
    query_stats = []
    
    print("=" * 100)
    print("RELEVANCE SCORE ANALYSIS")
    print("=" * 100)
    
    for query, docs in data_eval.items():
        # Count documents by relevance score
        count_rel_1 = sum(1 for score in docs.values() if score == 1 or score == 1.0)
        count_rel_0_5 = sum(1 for score in docs.values() if score == 0.5)
        count_rel_0 = sum(1 for score in docs.values() if score == 0 or score == 0.0)
        
        total_rel_1 += count_rel_1
        total_rel_0_5 += count_rel_0_5
        total_rel_0 += count_rel_0
        
        query_stats.append({
            'query': query,
            'rel_1': count_rel_1,
            'rel_0.5': count_rel_0_5,
            'rel_0': count_rel_0,
            'total_docs': len(docs)
        })
    
    # Calculate averages
    avg_rel_1 = total_rel_1 / total_queries
    avg_rel_0_5 = total_rel_0_5 / total_queries
    avg_rel_0 = total_rel_0 / total_queries
    
    # Print summary statistics
    print(f"\nTotal Queries: {total_queries}")
    print("\n" + "-" * 100)
    print("AVERAGE STATISTICS:")
    print("-" * 100)
    print(f"Average documents with relevance = 1.0:   {avg_rel_1:.2f}")
    print(f"Average documents with relevance = 0.5:   {avg_rel_0_5:.2f}")
    print(f"Average documents with relevance = 0.0:   {avg_rel_0:.2f}")
    
    # Print detailed per-query statistics
    print("\n" + "=" * 100)
    print("PER-QUERY BREAKDOWN:")
    print("=" * 100)
    print(f"{'Query':<60} | {'Rel=1':<8} | {'Rel=0.5':<8} | {'Rel=0':<8} | {'Total':<8}")
    print("-" * 100)
    
    for stat in query_stats:
        query_short = stat['query'][:57] + "..." if len(stat['query']) > 60 else stat['query']
        print(f"{query_short:<60} | {stat['rel_1']:<8} | {stat['rel_0.5']:<8} | {stat['rel_0']:<8} | {stat['total_docs']:<8}")
    
    # Distribution analysis
    print("\n" + "=" * 100)
    print("DISTRIBUTION ANALYSIS:")
    print("=" * 100)
    
    queries_with_no_relevant = sum(1 for stat in query_stats if stat['rel_1'] == 0 and stat['rel_0.5'] == 0)
    queries_with_1_5_relevant = sum(1 for stat in query_stats if 1 <= (stat['rel_1'] + stat['rel_0.5']) <= 5)
    queries_with_5_10_relevant = sum(1 for stat in query_stats if 5 < (stat['rel_1'] + stat['rel_0.5']) <= 10)
    queries_with_10_plus_relevant = sum(1 for stat in query_stats if (stat['rel_1'] + stat['rel_0.5']) > 10)
    
    print(f"Queries with 0 relevant documents:       {queries_with_no_relevant} ({queries_with_no_relevant/total_queries*100:.1f}%)")
    print(f"Queries with 1-5 relevant documents:     {queries_with_1_5_relevant} ({queries_with_1_5_relevant/total_queries*100:.1f}%)")
    print(f"Queries with 5-10 relevant documents:    {queries_with_5_10_relevant} ({queries_with_5_10_relevant/total_queries*100:.1f}%)")
    print(f"Queries with 10+ relevant documents:     {queries_with_10_plus_relevant} ({queries_with_10_plus_relevant/total_queries*100:.1f}%)")
    
    # Find queries with most/least relevant docs
    print("\n" + "=" * 100)
    print("EXTREME CASES:")
    print("=" * 100)
    
    sorted_by_relevant = sorted(query_stats, key=lambda x: x['rel_1'] + x['rel_0.5'], reverse=True)
    
    print("\nüìä Top 5 queries with MOST relevant documents:")
    for i, stat in enumerate(sorted_by_relevant[:5], 1):
        total_relevant = stat['rel_1'] + stat['rel_0.5']
        print(f"{i}. {stat['query'][:70]}")
        print(f"   ‚Üí Rel=1: {stat['rel_1']}, Rel=0.5: {stat['rel_0.5']}, Total relevant: {total_relevant}")
    
    print("\nüìä Top 5 queries with LEAST relevant documents:")
    for i, stat in enumerate(sorted_by_relevant[-5:], 1):
        total_relevant = stat['rel_1'] + stat['rel_0.5']
        print(f"{i}. {stat['query'][:70]}")
        print(f"   ‚Üí Rel=1: {stat['rel_1']}, Rel=0.5: {stat['rel_0.5']}, Total relevant: {total_relevant}")
    
    return {
        'total_queries': total_queries,
        'avg_rel_1': avg_rel_1,
        'avg_rel_0_5': avg_rel_0_5,
        'avg_rel_0': avg_rel_0,
        'total_rel_1': total_rel_1,
        'total_rel_0_5': total_rel_0_5,
        'total_rel_0': total_rel_0,
        'query_stats': query_stats
    }

# Run the analysis
if __name__ == "__main__":
    # Load your data_eval here
    # For the example, you would use the data from the document
    
    results = analyze_relevance_scores(data_eval)
    
    # Save results to JSON if needed
    with open('relevance_analysis_results.json', 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=4, ensure_ascii=False)
    
    print("\n‚úÖ Analysis complete! Results saved to 'relevance_analysis_results.json'")

In [None]:
import json
import pandas as pd

metrics = "../experiments/exp2/metrics/embedding/retrieved_query_documents_aggregated_metrics.json"

with open(metrics, "r", encoding="utf-8") as f:
    data = json.load(f)

df = pd.DataFrame(data)
k_values = sorted(df['k'].unique())

df['score'] = 0.6 * df['precision@k'] + 0.4 * df['NDCG@k']

df[df['k'] == 5][['model', 'config', 'precision@k', 'NDCG@k', 'recall@k', 'score']].sort_values('score', ascending=False)

In [None]:
df[df['k'] == 10][['model', 'config', 'precision@k', 'NDCG@k', 'score']].sort_values('score', ascending=False)