# Comparative Analysis of Cross-Lingual QA Results

This notebook provides comprehensive analysis of zero-shot and few-shot experiment results, including statistical significance testing and visualization.


In [None]:
# Import necessary libraries
import sys
import os
sys.path.append('../src')

import yaml
import torch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import json
from scipy import stats
from typing import Dict, List, Any

from src.evaluation.analysis import ResultAnalyzer, StatisticalAnalyzer, ResultVisualizer
from src.utils.device_utils import DeviceManager

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)


## 1. Load Experiment Results


In [None]:
# Load results from experiments
results_dir = Path('../results')

# Initialize analyzers
result_analyzer = ResultAnalyzer()
statistical_analyzer = StatisticalAnalyzer()
visualizer = ResultVisualizer()

# Load zero-shot results
zero_shot_results = {}
if (results_dir / 'zero_shot_mbert_results.json').exists():
    with open(results_dir / 'zero_shot_mbert_results.json', 'r') as f:
        zero_shot_results['mbert'] = json.load(f)

if (results_dir / 'zero_shot_mt5_results.json').exists():
    with open(results_dir / 'zero_shot_mt5_results.json', 'r') as f:
        zero_shot_results['mt5'] = json.load(f)

# Load few-shot results
few_shot_results = {}
if (results_dir / 'few_shot_mbert_results.json').exists():
    with open(results_dir / 'few_shot_mbert_results.json', 'r') as f:
        few_shot_results['mbert'] = json.load(f)

if (results_dir / 'few_shot_mt5_results.json').exists():
    with open(results_dir / 'few_shot_mt5_results.json', 'r') as f:
        few_shot_results['mt5'] = json.load(f)

print("Results loaded successfully!")
print(f"Zero-shot results: {list(zero_shot_results.keys())}")
print(f"Few-shot results: {list(few_shot_results.keys())}")
