In [None]:
# Fact verification models and datasets
fv_models = {
    'bert_fc': {'name': 'BERT + FC', 'params': '110M'},
    'rte_finetuned': {'name': 'RTE-Finetuned', 'params': '340M'},
    'xlnet_large': {'name': 'XLNet-Large', 'params': '340M'}
}

fv_datasets = ['fever', 'liar']

# Example metrics (Accuracy)
fv_performance = {
    'bert_fc': {'fever': 0.712, 'liar': 0.681},
    'rte_finetuned': {'fever': 0.745, 'liar': 0.718},
    'xlnet_large': {'fever': 0.758, 'liar': 0.729}
}

fv_perf_df = pd.DataFrame(fv_performance).T
print("\nFact Verification Performance (Accuracy):")
print("="*80)
print(fv_perf_df.to_string())
print("="*80)

print("\nModel analysis completed successfully! âœ“")

## Fact Verification Analysis

In [None]:
# Paraphrasing models and datasets
paraphrase_models = {
    'bert2bert': {'name': 'BERT2BERT', 'params': '220M'},
    'transformer_small': {'name': 'Transformer-Small', 'params': '90M'},
    'transformer_base': {'name': 'Transformer-Base', 'params': '230M'},
}

paraphrase_datasets = ['paranmt', 'mrpc', 'quora']

# Example metrics (BLEU, METEOR, BERTScore)
paraphrase_performance = {
    'bert2bert': {'paranmt': 38.2, 'mrpc': 52.1, 'quora': 45.8},
    'transformer_small': {'paranmt': 36.5, 'mrpc': 50.3, 'quora': 44.1},
    'transformer_base': {'paranmt': 40.1, 'mrpc': 54.7, 'quora': 47.9}
}

paraphrase_perf_df = pd.DataFrame(paraphrase_performance).T
print("\nParaphrasing Performance (BLEU Scores):")
print("="*80)
print(paraphrase_perf_df.to_string())
print("="*80)

## Paraphrasing Analysis

In [None]:
# Sarcasm detection models and datasets
sarcasm_models = {
    'bert_base': {'name': 'BERT-Base', 'modality': 'text', 'params': '110M'},
    'bert_large': {'name': 'BERT-Large', 'modality': 'text', 'params': '340M'},
    'roberta_base': {'name': 'RoBERTa-Base', 'modality': 'text', 'params': '125M'},
    'multimodal_fusion': {'name': 'Multimodal Fusion', 'modality': 'text+image+audio+video', 'params': '200M'}
}

sarcasm_datasets = ['sarc', 'mmsd2', 'mustard', 'sarcnet', 'sarcasm_headlines']

# Example performance metrics (template for real metrics)
sarcasm_performance = {
    'bert_base': {'sarc': 0.845, 'mmsd2': 0.782, 'mustard': 0.756, 'sarcnet': 0.712, 'sarcasm_headlines': 0.823},
    'bert_large': {'sarc': 0.861, 'mmsd2': 0.798, 'mustard': 0.768, 'sarcnet': 0.728, 'sarcasm_headlines': 0.839},
    'roberta_base': {'sarc': 0.859, 'mmsd2': 0.805, 'mustard': 0.771, 'sarcnet': 0.731, 'sarcasm_headlines': 0.841},
    'multimodal_fusion': {'sarc': 0.856, 'mmsd2': 0.832, 'mustard': 0.814, 'sarcnet': 0.789, 'sarcasm_headlines': 0.847}
}

# Create performance dataframe
sarcasm_perf_df = pd.DataFrame(sarcasm_performance).T
print("\nSarcasm Detection Performance (F1 Scores):")
print("="*80)
print(sarcasm_perf_df.to_string())
print("="*80)

## Sarcasm Detection Analysis

In [None]:
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path().cwd().parent if Path().cwd().name == 'notebooks' else Path().cwd()
sys.path.insert(0, str(project_root))

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Create output directory
output_dir = project_root / 'outputs' / 'model_analysis'
output_dir.mkdir(parents=True, exist_ok=True)

print(f"Project root: {project_root}")
print(f"Output directory: {output_dir}")

# FactCheck-MM Model Analysis

## Overview
Comprehensive analysis of FactCheck-MM model performance across three tasks:
- **Sarcasm Detection** (sarc, mmsd2, mustard, sarcnet, sarcasm_headlines)
- **Paraphrasing** (paranmt, mrpc, quora)
- **Fact Verification** (fever, liar)

## Analysis Scope
- Training curve analysis across datasets
- Model architecture comparison (text vs multimodal)
- Performance metrics evaluation
- Cross-dataset and cross-task analysis