In [None]:
import pandas as pd

def load_feature_list(csv_path):
    df = pd.read_csv(csv_path)

    features = (
        df["feature"]
        .dropna()
        .astype(str)
        .str.strip()
        .unique()
        .tolist()
    )

    return features

In [None]:
#Temp Analysis

# Install required packages
!pip install xgboost scikit-learn pandas numpy matplotlib seaborn scipy shap -q

# Mount Google Drive
from google.colab import drive
import os

print("Mounting Google Drive...")
try:
    drive.mount('/content/drive', force_remount=True)
    print(" Google Drive mounted successfully")

    # Verify mount
    if os.path.exists('/content/drive/MyDrive'):
        print(" Drive path verified")
    else:
        raise Exception("Drive mounted but MyDrive not found")
except Exception as e:
    print(f"ERROR")
    raise

# Check available space
import shutil
try:
    available_space = shutil.disk_usage('/content/drive/MyDrive').free / 1e9
    print(f" Available space on Drive: {available_space:.1f} GB")
    if available_space < 1.0:
        print(f"   WARNING: Low disk space!")
except:
    print(f"   Could not check disk space")


# Install kaleido for static image export
!pip install -U kaleido -q


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from scipy import stats
from scipy.stats import ttest_ind
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix
import xgboost as xgb
from xgboost import XGBClassifier
import shap
import warnings
warnings.filterwarnings('ignore')
import os
import time
import pickle

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

Mounting Google Drive...
Mounted at /content/drive
 Google Drive mounted successfully
 Drive path verified
 Available space on Drive: 208.7 GB
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.0/69.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Paths - UPDATE THESE to your actual paths
ORIGINAL_DF_PATH = '/content/raid_sample_medium_PostPOS_CLEAN (1).csv'
TEMPORAL_FEATURES_PATH = '/content/drive/MyDrive/Tesi Magistrale/temporal_features/temporal_features_final.csv'
Backbone = '/content/baseline_features.csv'

# Output directory for results
OUTPUT_DIR = '/content/drive/MyDrive/Tesi Magistrale/temporal_analysis'
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'plots'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'tables'), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_DIR, 'checkpoints'), exist_ok=True)

df_original = pd.read_csv(ORIGINAL_DF_PATH)
df_temporal = pd.read_csv(TEMPORAL_FEATURES_PATH)
backbone_features = load_feature_list(backbone)

# Identify temporal feature columns
temporal_features = [c for c in df_temporal.columns if c != 'id']
print(f" Temporal feature columns: {len(temporal_features)}")

 Temporal feature columns: 33


In [None]:
#MERGE


# Group temporal features by category
print("Temporal feature breakdown by category:")

# Category I: Event Structure
event_structure_features = [
    'temp_num_events',
    'temp_events_per_sentence',
    'temp_event_lexical_diversity',
    'temp_tense_distribution_entropy',
    'temp_num_timex',
    'temp_timex_event_ratio'
]

# Category II: Relations
relation_features = [
    'temp_rel_mean_confidence',
    'temp_rel_confidence_variance',
    'temp_rel_before_after_ratio',
    'temp_rel_raw_cycle_count',
    'temp_rel_cycle_edge_ratio',
    'temp_rel_parallel_edge_rate',
    'temp_rel_type_entropy',
    'temp_rel_transitivity_violation_rate',
    'temp_rel_cycle_approx_flag'
]

# Category III: Graph-Theoretic
graph_features = [
    'tg_edge_retention',
    'tg_degree_entropy',
    'tg_avg_in_degree',
    'tg_avg_out_degree',
    'tg_ordering_entropy',
    'tg_longest_path',
    'tg_mean_depth',
    'tg_branching_factor',
    'tg_global_coherence'
]

# Category IV: Constraints
constraint_features = [
    'temp_constraint_violation_rate',
    'temp_constraint_csp_score',
    'temp_scope_variance'
]

# Category V: Form-Meaning
form_meaning_features = [
    'temp_tense_time_alignment',
    'temp_deixis_consistency',
    'temp_ref_time_shifts'
]

# Category VI: Graph Organization
graph_org_features = [
    'tg_centralization',
    'tg_clustering_coefficient',
    'tg_density'
]

# Verify all features are present
all_temporal_features = (event_structure_features + relation_features +
                         graph_features + constraint_features +
                         form_meaning_features + graph_org_features)

df_backbone = df_original[['id', 'is_ai'] + backbone_features].copy()
df_merged = df_backbone.merge(df_temporal, on='id', how='inner')

# Filter to only features that exist in dataframe
temporal_features_present = [f for f in all_temporal_features if f in df_merged.columns]
missing_temporal = [f for f in all_temporal_features if f not in df_merged.columns]

if missing_temporal:
    print(f" WARNING: Missing temporal features: {missing_temporal}")

print(f"Event Structure: {len([f for f in event_structure_features if f in df_merged.columns])}")
print(f"Relations: {len([f for f in relation_features if f in df_merged.columns])}")
print(f"Graph-Theoretic: {len([f for f in graph_features if f in df_merged.columns])}")
print(f"Constraints: {len([f for f in constraint_features if f in df_merged.columns])}")
print(f"Form-Meaning: {len([f for f in form_meaning_features if f in df_merged.columns])}")
print(f"Graph Organization: {len([f for f in graph_org_features if f in df_merged.columns])}")

# Use only present features
temporal_features = temporal_features_present

# Create feature group mapping
feature_groups = {}
for feat in event_structure_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Event Structure'
for feat in relation_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Relations'
for feat in graph_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Graph-Theoretic'
for feat in constraint_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Constraints'
for feat in form_meaning_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Form-Meaning'
for feat in graph_org_features:
    if feat in df_merged.columns:
        feature_groups[feat] = 'Graph Organization'


Temporal feature breakdown by category:
Event Structure: 6
Relations: 9
Graph-Theoretic: 9
Constraints: 3
Form-Meaning: 3
Graph Organization: 3


In [None]:
#DATA QUALITY

# Check for missing values
print("Missing Value Analysis:")
nan_counts = df_merged[temporal_features].isna().sum()
nan_features = nan_counts[nan_counts > 0].sort_values(ascending=False)

if len(nan_features) > 0:
    print(f"  Found {len(nan_features)} features with missing values:")
    for feat, count in nan_features.items():
        pct = 100 * count / len(df_merged)
        print(f"  {feat}: {count} ({pct:.2f}%)")
else:
    print("   No missing values found!")

# Check for infinite values
print("Infinite Value Check:")
inf_counts = np.isinf(df_merged[temporal_features]).sum()
inf_features = inf_counts[inf_counts > 0]

if len(inf_features) > 0:
    print(f"  Found {len(inf_features)} features with infinite values:")
    for feat, count in inf_features.items():
        print(f"  {feat}: {count}")
else:
    print("   No infinite values found!")

# Check for zero-variance features
print("Zero-Variance Check:")
zero_var_features = [f for f in temporal_features if df_merged[f].std() == 0]

if zero_var_features:
    print(f"  Found {len(zero_var_features)} zero-variance features:")
    for feat in zero_var_features:
        print(f"  {feat}: constant value = {df_merged[feat].iloc[0]}")
else:
    print("   No zero-variance features!")


Missing Value Analysis:
   No missing values found!
Infinite Value Check:
   No infinite values found!
Zero-Variance Check:
   No zero-variance features!


In [None]:
# HANDLE INVALID VALUES

print("HANDLING INVALID VALUES")

df_clean = df_merged.copy()

invalid_replacements = 0

for feat in temporal_features:
    # Count invalids
    n_nan = df_clean[feat].isna().sum()
    n_inf = np.isinf(df_clean[feat]).sum()

    if n_nan > 0 or n_inf > 0:
        print(f"{feat}: {n_nan} NaN, {n_inf} Inf → replacing with 0")
        df_clean[feat] = df_clean[feat].replace([np.inf, -np.inf], np.nan)
        df_clean[feat].fillna(0, inplace=True)
        invalid_replacements += 1

if invalid_replacements == 0:
    print("   No invalid values to replace!")
else:
    print(f"\n Replaced invalid values in {invalid_replacements} features")

# Verify no invalids remain
remaining_nans = df_clean[temporal_features].isna().sum().sum()
remaining_infs = np.isinf(df_clean[temporal_features]).sum().sum()

print(f"Verification:")
print(f"Remaining NaN: {remaining_nans}")
print(f"Remaining Inf: {remaining_infs}")

if remaining_nans == 0 and remaining_infs == 0:
    print("   All invalid values handled!")

HANDLING INVALID VALUES
   No invalid values to replace!
Verification:
Remaining NaN: 0
Remaining Inf: 0
   All invalid values handled!


In [None]:
# EXPLORATORY DATA ANALYSIS

# Separate AI and Human samples
df_ai = df_clean[df_clean['is_ai'] == 1]
df_human = df_clean[df_clean['is_ai'] == 0]

descriptive_stats = []

for feature in temporal_features:
    ai_values = df_ai[feature]
    human_values = df_human[feature]

    descriptive_stats.append({
        'feature': feature,
        'category': feature_groups.get(feature, 'Unknown'),
        'ai_mean': ai_values.mean(),
        'ai_std': ai_values.std(),
        'ai_median': ai_values.median(),
        'human_mean': human_values.mean(),
        'human_std': human_values.std(),
        'human_median': human_values.median(),
        'diff_mean': ai_values.mean() - human_values.mean(),
        'diff_pct': 100 * (ai_values.mean() - human_values.mean()) / human_values.mean()
                    if human_values.mean() != 0 else np.nan
    })

df_stats = pd.DataFrame(descriptive_stats)

# Save descriptive statistics
stats_path = os.path.join(OUTPUT_DIR, 'tables', 'temporal_descriptive_statistics.csv')
df_stats.to_csv(stats_path, index=False)
print(f" Descriptive statistics saved: {stats_path}")

# Display top differences by absolute percentage
print("Top 10 temporal features by relative difference (%):")
df_stats_sorted = df_stats.sort_values('diff_pct', key=abs, ascending=False)
print(df_stats_sorted[['feature', 'category', 'ai_mean', 'human_mean', 'diff_pct']].head(10).to_string(index=False))


 Descriptive statistics saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/temporal_descriptive_statistics.csv
Top 10 temporal features by relative difference (%):
                             feature        category      ai_mean    human_mean   diff_pct
          temp_rel_cycle_approx_flag       Relations     0.001667      0.033167 -94.974874
                 temp_scope_variance     Constraints 50028.699621 287561.246549 -82.602419
      temp_constraint_violation_rate     Constraints     0.071064      0.050748  40.032689
             temp_deixis_consistency    Form-Meaning     1.120333      1.611833 -30.493227
           temp_constraint_csp_score     Constraints    88.011833    113.499833 -22.456421
                     temp_num_events Event Structure    27.144667     34.946833 -22.325819
                temp_ref_time_shifts    Form-Meaning     0.407833      0.519000 -21.419396
temp_rel_transitivity_violation_rate       Relations     1.859204      2.263812 -17.8728

In [None]:
# STATISTICAL TESTING

def cohens_d(group1, group2):
    """Calculate Cohen's d effect size"""
    n1, n2 = len(group1), len(group2)
    var1, var2 = group1.var(), group2.var()
    pooled_std = np.sqrt(((n1-1)*var1 + (n2-1)*var2) / (n1+n2-2))
    return (group1.mean() - group2.mean()) / pooled_std if pooled_std > 0 else 0

print("Running statistical tests for all temporal features")

statistical_results = []

for feature in tqdm(temporal_features, desc="Testing features"):
    ai_vals = df_ai[feature].dropna()
    human_vals = df_human[feature].dropna()

    # T-test
    t_stat, p_value = ttest_ind(ai_vals, human_vals, equal_var=False)

    # Cohen's d
    d = cohens_d(ai_vals, human_vals)

    # Effect size interpretation
    if abs(d) < 0.2:
        effect_size = 'negligible'
    elif abs(d) < 0.5:
        effect_size = 'small'
    elif abs(d) < 0.8:
        effect_size = 'medium'
    else:
        effect_size = 'large'

    statistical_results.append({
        'feature': feature,
        'category': feature_groups.get(feature, 'Unknown'),
        't_statistic': t_stat,
        'p_value': p_value,
        'cohens_d': d,
        'effect_size': effect_size,
        'significant': p_value < 0.05,
        'ai_mean': ai_vals.mean(),
        'human_mean': human_vals.mean()
    })

df_stats_tests = pd.DataFrame(statistical_results)

# Apply Bonferroni correction
bonferroni_threshold = 0.05 / len(temporal_features)
df_stats_tests['bonferroni_significant'] = df_stats_tests['p_value'] < bonferroni_threshold

# Save statistical results
stats_test_path = os.path.join(OUTPUT_DIR, 'tables', 'temporal_statistical_tests.csv')
df_stats_tests.to_csv(stats_test_path, index=False)
print(f"\n Statistical test results saved: {stats_test_path}")

# Summary statistics
n_significant = (df_stats_tests['p_value'] < 0.05).sum()
n_bonferroni = df_stats_tests['bonferroni_significant'].sum()

print(f"Statistical significance summary:")
print(f"Significant (p < 0.05): {n_significant}/{len(temporal_features)} ({100*n_significant/len(temporal_features):.1f}%)")
print(f"Bonferroni-corrected (p < {bonferroni_threshold:.6f}): {n_bonferroni}/{len(temporal_features)} ({100*n_bonferroni/len(temporal_features):.1f}%)")

# Effect size distribution
print(f"Effect size distribution:")
for effect in ['negligible', 'small', 'medium', 'large']:
    count = (df_stats_tests['effect_size'] == effect).sum()
    print(f"{effect.capitalize()}: {count} ({100*count/len(df_stats_tests):.1f}%)")

# Category-level summary
print(f"Significance by category:")
category_summary = df_stats_tests.groupby('category').agg({
    'bonferroni_significant': 'sum',
    'cohens_d': lambda x: x.abs().mean()
}).round(4)
category_summary.columns = ['Bonferroni Significant', 'Mean |Cohen\'s d|']
print(category_summary.to_string())

# Top features by effect size
print("Top 15 temporal features by |Cohen's d|:")
df_top_effects = df_stats_tests.sort_values('cohens_d', key=abs, ascending=False).head(15)
print(df_top_effects[['feature', 'category', 'cohens_d', 'p_value', 'effect_size']].to_string(index=False))


Running statistical tests for all temporal features


Testing features: 100%|██████████| 33/33 [00:00<00:00, 281.22it/s]


 Statistical test results saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/temporal_statistical_tests.csv
Statistical significance summary:
Significant (p < 0.05): 26/33 (78.8%)
Bonferroni-corrected (p < 0.001515): 25/33 (75.8%)
Effect size distribution:
Negligible: 28 (84.8%)
Small: 5 (15.2%)
Medium: 0 (0.0%)
Large: 0 (0.0%)
Significance by category:
                    Bonferroni Significant  Mean |Cohen's d|
category                                                    
Constraints                              3            0.1589
Event Structure                          4            0.1050
Form-Meaning                             3            0.1109
Graph Organization                       1            0.0532
Graph-Theoretic                          8            0.0994
Relations                                6            0.1121
Top 15 temporal features by |Cohen's d|:
                             feature           category  cohens_d      p_value effect_size
   




In [None]:
#Category-Level Effect Sizes


fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

category_colors = {
    'Event Structure': '#1f77b4',
    'Relations': '#ff7f0e',
    'Graph-Theoretic': '#2ca02c',
    'Constraints': '#d62728',
    'Form-Meaning': '#9467bd',
    'Graph Organization': '#8c564b'
}

for idx, (category, color) in enumerate(category_colors.items()):
    ax = axes[idx]

    cat_features = df_stats_tests[df_stats_tests['category'] == category].sort_values('cohens_d', ascending=True)

    if len(cat_features) == 0:
        ax.text(0.5, 0.5, f'No {category} features',
                ha='center', va='center', fontsize=12)
        ax.set_title(f'{category}\n(0 features)', fontsize=11, fontweight='bold')
        ax.axis('off')
        continue

    y_pos = np.arange(len(cat_features))
    colors_cat = ['red' if sig else 'gray' for sig in cat_features['bonferroni_significant']]

    ax.barh(y_pos, cat_features['cohens_d'], color=colors_cat, alpha=0.7)
    ax.set_yticks(y_pos)
    ax.set_yticklabels([f.replace('temp_', '').replace('tg_', '')
                         for f in cat_features['feature']], fontsize=8)
    ax.set_xlabel("Cohen's d", fontsize=9, fontweight='bold')
    ax.set_title(f'{category}\n({len(cat_features)} features)',
                 fontsize=11, fontweight='bold')
    ax.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
    ax.grid(True, alpha=0.3, axis='x')

plt.suptitle("Temporal Features: Effect Sizes by Category",
             fontsize=15, fontweight='bold')
plt.tight_layout()
category_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'cohens_d_by_category.png')
plt.savefig(category_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {category_plot_path}")



   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/cohens_d_by_category.png


In [None]:

# TRAIN/TEST SPLIT

train_df, test_df = train_test_split(
    df_clean, test_size=0.2, random_state=42, stratify=df_clean['is_ai']
)

print(f"Class distribution (train):")
print(f"AI: ({100*(train_df['is_ai']==1).sum()/len(train_df):.1f}%)")
print(f"Human: ({100*(train_df['is_ai']==0).sum()/len(train_df):.1f}%)")
print(f"Class distribution (test):")
print(f"AI: ({100*(test_df['is_ai']==1).sum()/len(test_df):.1f}%)")
print(f"Human: ({100*(test_df['is_ai']==0).sum()/len(test_df):.1f}%)")

Class distribution (train):
AI: (50.0%)
Human: (50.0%)
Class distribution (test):
AI: (50.0%)
Human: (50.0%)


In [None]:
# MONO-TEMPORAL ANALYSIS (TEMPORAL FEATURES ONLY)

print("MONO-TEMPORAL ANALYSIS (TEMPORAL FEATURES ONLY)")

# Prepare data (temporal features only)
X_temporal_train = train_df[temporal_features].values
X_temporal_test = test_df[temporal_features].values
y_temporal_train = train_df['is_ai'].values
y_temporal_test = test_df['is_ai'].values

# Train XGBoost
xgb_temporal = XGBClassifier(
    n_estimators=500,
    max_depth=9,
    learning_rate=0.15,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric='logloss',
    use_label_encoder=False
)

# Cross-validation on training set
cv_scores_temporal = cross_val_score(
    xgb_temporal, X_temporal_train, y_temporal_train,
    cv=5, scoring='f1'
)
print(f"F1 scores: {cv_scores_temporal}")
print(f"Mean F1: {cv_scores_temporal.mean():.4f}")

# Train on full training set
print("Training final model...")
xgb_temporal.fit(X_temporal_train, y_temporal_train)

# Evaluate on test set
y_pred_temporal = xgb_temporal.predict(X_temporal_test)
f1_temporal = f1_score(y_temporal_test, y_pred_temporal)

print(f"\n Mono-Temporal Model Performance:")
print(f"Test F1: {f1_temporal:.4f}")
print(f"CV F1: {cv_scores_temporal.mean():.4f}")

# Classification report
print(f"Classification Report (Test Set):")
print(classification_report(y_temporal_test, y_pred_temporal,
                           target_names=['Human', 'AI']))

# Save model
temporal_model_path = os.path.join(OUTPUT_DIR, 'checkpoints', 'xgb_temporal_only.pkl')
with open(temporal_model_path, 'wb') as f:
    pickle.dump(xgb_temporal, f)
print(f"Model saved: {temporal_model_path}")

MONO-TEMPORAL ANALYSIS (TEMPORAL FEATURES ONLY)
F1 scores: [0.68285124 0.68779221 0.68766404 0.69179827 0.68621399]
Mean F1: 0.6873
Training final model...

 Mono-Temporal Model Performance:
Test F1: 0.6980
CV F1: 0.6873
Classification Report (Test Set):
              precision    recall  f1-score   support

       Human       0.70      0.70      0.70      1200
          AI       0.70      0.70      0.70      1200

    accuracy                           0.70      2400
   macro avg       0.70      0.70      0.70      2400
weighted avg       0.70      0.70      0.70      2400

Model saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/checkpoints/xgb_temporal_only.pkl


In [None]:

# TEMPORAL FEATURE IMPORTANCE (MONO-TEMPORAL MODEL)

# Extract feature importance
temporal_importance = pd.DataFrame({
    'feature': temporal_features,
    'importance': xgb_temporal.feature_importances_,
    'category': [feature_groups.get(f, 'Unknown') for f in temporal_features]
}).sort_values('importance', ascending=False)

# Normalize to percentages
temporal_importance['importance_pct'] = 100 * temporal_importance['importance'] / temporal_importance['importance'].sum()

# Save
temporal_importance_path = os.path.join(OUTPUT_DIR, 'tables', 'temporal_feature_importance.csv')
temporal_importance.to_csv(temporal_importance_path, index=False)
print(f" Feature importance saved: {temporal_importance_path}")

print("Top 20 temporal features by importance:")
print(temporal_importance.head(20)[['feature', 'category', 'importance_pct']].to_string(index=False))

# Category-level importance
print("Feature importance by category:")
category_importance = temporal_importance.groupby('category').agg({
    'importance': 'sum',
    'importance_pct': 'sum',
    'feature': 'count'
}).round(2)
category_importance.columns = ['Total Importance', 'Importance %', 'N Features']
category_importance = category_importance.sort_values('Importance %', ascending=False)
print(category_importance.to_string())

 Feature importance saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/temporal_feature_importance.csv
Top 20 temporal features by importance:
                             feature           category  importance_pct
                 temp_scope_variance        Constraints        4.961464
                 tg_ordering_entropy    Graph-Theoretic        4.739097
                     temp_num_events    Event Structure        3.994022
                temp_ref_time_shifts       Form-Meaning        3.873578
         temp_rel_parallel_edge_rate          Relations        3.823071
             temp_deixis_consistency       Form-Meaning        3.755541
      temp_constraint_violation_rate        Constraints        3.586406
                          tg_density Graph Organization        3.526302
                   tg_edge_retention    Graph-Theoretic        3.459198
               temp_rel_type_entropy          Relations        3.330871
                 tg_global_coherence    Graph

In [None]:
# Temporal Feature Importance


print("Generating temporal feature importance visualization...")

# Top 30 features
top_n = min(30, len(temporal_importance))
df_plot_importance = temporal_importance.head(top_n).sort_values('importance', ascending=True)

fig, ax = plt.subplots(figsize=(12, max(10, top_n * 0.35)))

# Color by category
colors_importance = [category_colors.get(cat, 'gray') for cat in df_plot_importance['category']]

y_pos = np.arange(len(df_plot_importance))
ax.barh(y_pos, df_plot_importance['importance_pct'], color=colors_importance, alpha=0.8)

ax.set_yticks(y_pos)
ax.set_yticklabels(df_plot_importance['feature'], fontsize=9)
ax.set_xlabel('XGBoost Importance (% Gain)', fontsize=12, fontweight='bold')
ax.set_title(f'Top {top_n} Temporal Features by Importance\n' +
             f'Mono-Temporal Model (Test F1={f1_temporal:.4f})',
             fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')

# Add category legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=color, label=cat, alpha=0.8)
                   for cat, color in category_colors.items()
                   if cat in df_plot_importance['category'].values]
ax.legend(handles=legend_elements, loc='lower right', fontsize=9)

plt.tight_layout()
temporal_importance_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'temporal_importance_top30.png')
plt.savefig(temporal_importance_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {temporal_importance_plot_path}")

Generating temporal feature importance visualization...
   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/temporal_importance_top30.png


In [None]:
# Category Importance Breakdown


print("Generating category importance breakdown...")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Total importance by category
sorted_categories = category_importance.sort_values('Importance %', ascending=False)
colors_cat = [category_colors.get(cat, 'gray') for cat in sorted_categories.index]

ax1.bar(range(len(sorted_categories)), sorted_categories['Importance %'],
        color=colors_cat, alpha=0.8)
ax1.set_xticks(range(len(sorted_categories)))
ax1.set_xticklabels(sorted_categories.index, rotation=45, ha='right', fontsize=10)
ax1.set_ylabel('Total Importance (%)', fontsize=12, fontweight='bold')
ax1.set_title('Temporal Feature Importance by Category\n(Total Contribution)',
              fontsize=13, fontweight='bold')
ax1.grid(True, alpha=0.3, axis='y')

# Add value labels
for i, (cat, val) in enumerate(zip(sorted_categories.index, sorted_categories['Importance %'])):
    ax1.text(i, val + 0.5, f'{val:.1f}%', ha='center', va='bottom',
             fontsize=9, fontweight='bold')

# Plot 2: Per-feature average importance
sorted_categories['Avg per Feature'] = sorted_categories['Total Importance'] / sorted_categories['N Features']
sorted_avg = sorted_categories.sort_values('Avg per Feature', ascending=False)
colors_avg = [category_colors.get(cat, 'gray') for cat in sorted_avg.index]

ax2.bar(range(len(sorted_avg)), sorted_avg['Avg per Feature'],
        color=colors_avg, alpha=0.8)
ax2.set_xticks(range(len(sorted_avg)))
ax2.set_xticklabels(sorted_avg.index, rotation=45, ha='right', fontsize=10)
ax2.set_ylabel('Average Importance per Feature', fontsize=12, fontweight='bold')
ax2.set_title('Temporal Feature Importance by Category\n(Per-Feature Average)',
              fontsize=13, fontweight='bold')
ax2.grid(True, alpha=0.3, axis='y')

# Add value labels
for i, (cat, val) in enumerate(zip(sorted_avg.index, sorted_avg['Avg per Feature'])):
    ax2.text(i, val + 0.0005, f'{val:.4f}', ha='center', va='bottom',
             fontsize=9, fontweight='bold')

plt.tight_layout()
category_importance_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'temporal_category_importance.png')
plt.savefig(category_importance_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {category_importance_plot_path}")


Generating category importance breakdown...
   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/temporal_category_importance.png


In [None]:
X_backbone_train = train_df[backbone_features].values
X_backbone_test = test_df[backbone_features].values

# Train XGBoost
xgb_backbone = XGBClassifier(
    n_estimators=500,
    max_depth=9,
    learning_rate=0.15,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric='logloss',
    use_label_encoder=False
)

# Cross-validation
print("Running 5-fold cross-validation")
cv_scores_backbone = cross_val_score(
    xgb_backbone, X_backbone_train, y_temporal_train,
    cv=5, scoring='f1'
)
print(f"F1 scores: {cv_scores_backbone}")
print(f"Mean F1: {cv_scores_backbone.mean():.4f} (±{cv_scores_backbone.std():.4f})")


xgb_backbone.fit(X_backbone_train, y_temporal_train)

y_pred_backbone = xgb_backbone.predict(X_backbone_test)
f1_backbone = f1_score(y_temporal_test, y_pred_backbone)

print(f"Backbone Model Performance:")
print(f"CV F1: {f1_backbone:.4f} (±{cv_scores_backbone.std():.4f})")

all_features = backbone_features + temporal_features
X_combined_train = train_df[all_features].values
X_combined_test = test_df[all_features].values

# Train XGBoost
xgb_combined = XGBClassifier(
    n_estimators=500,
    max_depth=9,
    learning_rate=0.15,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric='logloss',
    use_label_encoder=False
)

# Cross-validation
cv_scores_combined = cross_val_score(
    xgb_combined, X_combined_train, y_temporal_train,
    cv=5, scoring='f1'
)
print(f"F1 scores: {cv_scores_combined}")
print(f"Mean F1: {cv_scores_combined.mean():.4f} (±{cv_scores_combined.std():.4f})")

# Train on full training set
print("Training final model")
xgb_combined.fit(X_combined_train, y_temporal_train)

# Evaluate on test set
y_pred_combined = xgb_combined.predict(X_combined_test)
f1_combined = f1_score(y_temporal_test, y_pred_combined)

print(f"Combined Model Performance:")

print(f"CV F1: {f1_combined:.4f} (±{cv_scores_combined.std():.4f})")


# Classification report
print(f"Classification Report (Test Set):")
print(classification_report(y_temporal_test, y_pred_combined,
                           target_names=['Human', 'AI']))

# Save model
combined_model_path = os.path.join(OUTPUT_DIR, 'checkpoints', 'xgb_combined.pkl')
with open(combined_model_path, 'wb') as f:
    pickle.dump(xgb_combined, f)
print(f"\n Model saved: {combined_model_path}")


Running 5-fold cross-validation
F1 scores: [0.81203008 0.81384373 0.85212766 0.83169342 0.82539683]
Mean F1: 0.8270 (±0.0145)
Backbone Model Performance:
CV F1: 0.8405 (±0.0145)
F1 scores: [0.82168022 0.82365477 0.86155485 0.82747771 0.82939633]
Mean F1: 0.8328 (±0.0147)
Training final model
Combined Model Performance:
CV F1: 0.8488 (±0.0147)
Classification Report (Test Set):
              precision    recall  f1-score   support

       Human       0.83      0.88      0.86      1200
          AI       0.87      0.83      0.85      1200

    accuracy                           0.85      2400
   macro avg       0.85      0.85      0.85      2400
weighted avg       0.85      0.85      0.85      2400


 Model saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/checkpoints/xgb_combined.pkl


In [None]:
# Extract feature importance
combined_importance = pd.DataFrame({
    'feature': all_features,
    'importance': xgb_combined.feature_importances_
}).sort_values('importance', ascending=False)

# Add feature type
combined_importance['type'] = combined_importance['feature'].apply(
    lambda f: 'Backbone' if f in backbone_features else 'Temporal'
)

# Add category for temporal features
combined_importance['category'] = combined_importance['feature'].apply(
    lambda f: 'Backbone' if f in backbone_features else feature_groups.get(f, 'Unknown')
)

# Normalize to percentages
combined_importance['importance_pct'] = 100 * combined_importance['importance'] / combined_importance['importance'].sum()

# Save
combined_importance_path = os.path.join(OUTPUT_DIR, 'tables', 'combined_feature_importance.csv')
combined_importance.to_csv(combined_importance_path, index=False)
print(f" Feature importance saved: {combined_importance_path}")

print("Top 20 features by importance (combined model):")
print(combined_importance.head(20)[['feature', 'type', 'category', 'importance_pct']].to_string(index=False))

# Type-level summary
print("Importance by feature type:")
type_summary = combined_importance.groupby('type').agg({
    'importance': 'sum',
    'importance_pct': 'sum',
}).round(2)
type_summary.columns = ['Total Importance', 'Importance %']
print(type_summary.to_string())

# Category-level summary (including backbone)
print("Importance by category:")
category_summary_combined = combined_importance.groupby('category').agg({
    'importance': 'sum',
    'importance_pct': 'sum',
}).round(2)
category_summary_combined.columns = ['Total Importance', 'Importance %']
category_summary_combined = category_summary_combined.sort_values('Importance %', ascending=False)
print(category_summary_combined.to_string())

 Feature importance saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/combined_feature_importance.csv
Top 20 features by importance (combined model):
                 feature     type           category  importance_pct
                 yules_k Backbone           Backbone        7.541703
       trigram_diversity Backbone           Backbone        4.310170
     tg_ordering_entropy Temporal    Graph-Theoretic        3.977894
     temp_scope_variance Temporal        Constraints        3.918911
     sentence_length_std Backbone           Backbone        3.859744
              tg_density Temporal Graph Organization        3.632725
        type_token_ratio Backbone           Backbone        3.628889
         temp_num_events Temporal    Event Structure        3.516800
       tg_avg_out_degree Temporal    Graph-Theoretic        3.265255
             comma_ratio Backbone           Backbone        2.533337
     tg_global_coherence Temporal    Graph-Theoretic        2.505810
t

In [None]:

#Combined Model Feature Importance (Color-Coded)


print("Generating combined model feature importance visualization...")

# Top 20 features
top_n_combined = min(20, len(combined_importance))
df_plot_combined = combined_importance.head(top_n_combined).sort_values('importance', ascending=True)

fig, ax = plt.subplots(figsize=(12, max(12, top_n_combined * 0.3)))

# Color by category (backbone gets its own color)
color_mapping = category_colors.copy()
color_mapping['Backbone'] = '#17becf'  # Cyan for backbone

colors_combined = [color_mapping.get(cat, 'gray') for cat in df_plot_combined['category']]

y_pos = np.arange(len(df_plot_combined))
ax.barh(y_pos, df_plot_combined['importance_pct'], color=colors_combined, alpha=0.8)

ax.set_yticks(y_pos)
ax.set_yticklabels(df_plot_combined['feature'], fontsize=8)
ax.set_xlabel('XGBoost Importance (% Gain)', fontsize=12, fontweight='bold')
ax.set_title(f'Top {top_n_combined} Features: Combined Model\n' +
             f'Backbone + Temporal (Test F1={f1_combined:.4f})',
             fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')

# Add legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=color_mapping['Backbone'], label='Backbone', alpha=0.8)]
legend_elements += [Patch(facecolor=color, label=cat, alpha=0.8)
                    for cat, color in category_colors.items()
                    if cat in df_plot_combined['category'].values]
ax.legend(handles=legend_elements, loc='lower right', fontsize=9, ncol=2)

plt.tight_layout()
combined_importance_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'combined_importance_top40.png')
plt.savefig(combined_importance_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {combined_importance_plot_path}")

Generating combined model feature importance visualization...
   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/combined_importance_top40.png


In [None]:
# ABLATION STUDY - CATEGORY-LEVEL (COMBINED MODEL)

print("ABLATION STUDY - CATEGORY-LEVEL")


print("Testing removal of each temporal feature category...")

ablation_results = []

# Baseline: Full combined model
ablation_results.append({
    'configuration': 'Full Model (Backbone + All Temporal)',
    'n_features': len(all_features),
    'f1_score': f1_combined,
    'delta_f1': 0.0,
    'features_removed': 'None'
})

# Test removing each category
for category in ['Event Structure', 'Relations', 'Graph-Theoretic',
                 'Constraints', 'Form-Meaning', 'Graph Organization']:

    print(f"\n  Testing: Backbone + All Temporal EXCEPT {category}")

    # Get features in this category
    category_feats = [f for f, c in feature_groups.items() if c == category]

    if len(category_feats) == 0:
        print(f"     No features in {category}")
        continue

    # Create feature set without this category
    features_without_category = backbone_features + [f for f in temporal_features if f not in category_feats]

    print(f"  Features without {category}: {len(features_without_category)}")
    print(f"  Removed: {len(category_feats)} features")

    # Train model
    X_ablation_train = train_df[features_without_category].values
    X_ablation_test = test_df[features_without_category].values

    xgb_ablation = XGBClassifier(
        n_estimators=500,
        max_depth=9,
        learning_rate=0.15,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric='logloss',
        use_label_encoder=False
    )

    xgb_ablation.fit(X_ablation_train, y_temporal_train)
    y_pred_ablation = xgb_ablation.predict(X_ablation_test)
    f1_ablation = f1_score(y_temporal_test, y_pred_ablation)

    delta = f1_ablation - f1_combined

    print(f"  F1: {f1_ablation:.4f} (Δ{delta:+.4f})")

    ablation_results.append({
        'configuration': f'Without {category}',
        'n_features': len(features_without_category),
        'f1_score': f1_ablation,
        'delta_f1': delta,
        'features_removed': category
    })

# Also test: Backbone only (already computed)
ablation_results.append({
    'configuration': 'Backbone Only (All Temporal Removed)',
    'n_features': len(backbone_features),
    'f1_score': f1_backbone,
    'delta_f1': f1_backbone - f1_combined,
    'features_removed': 'All Temporal'
})

df_ablation = pd.DataFrame(ablation_results)

# Save ablation results
ablation_path = os.path.join(OUTPUT_DIR, 'tables', 'ablation_category_level.csv')
df_ablation.to_csv(ablation_path, index=False)
print(f"\n Ablation results saved: {ablation_path}")

print("Category-Level Ablation Summary:")
print(df_ablation.sort_values('delta_f1', ascending=True).to_string(index=False))

# Identify most important category (largest drop when removed)
most_important_category = df_ablation[df_ablation['configuration'].str.startswith('Without')].sort_values('delta_f1').iloc[0]
print(f"Most Critical Category:")
print(f"{most_important_category['features_removed']}")
print(f"Removal causes: {most_important_category['delta_f1']:.4f} drop in F1")

ABLATION STUDY - CATEGORY-LEVEL
Testing removal of each temporal feature category...

  Testing: Backbone + All Temporal EXCEPT Event Structure
  Features without Event Structure: 36
  Removed: 6 features
  F1: 0.8436 (Δ-0.0052)

  Testing: Backbone + All Temporal EXCEPT Relations
  Features without Relations: 33
  Removed: 9 features
  F1: 0.8493 (Δ+0.0005)

  Testing: Backbone + All Temporal EXCEPT Graph-Theoretic
  Features without Graph-Theoretic: 33
  Removed: 9 features
  F1: 0.8462 (Δ-0.0026)

  Testing: Backbone + All Temporal EXCEPT Constraints
  Features without Constraints: 39
  Removed: 3 features
  F1: 0.8383 (Δ-0.0105)

  Testing: Backbone + All Temporal EXCEPT Form-Meaning
  Features without Form-Meaning: 39
  Removed: 3 features
  F1: 0.8481 (Δ-0.0007)

  Testing: Backbone + All Temporal EXCEPT Graph Organization
  Features without Graph Organization: 39
  Removed: 3 features
  F1: 0.8505 (Δ+0.0017)

 Ablation results saved: /content/drive/MyDrive/Tesi Magistrale/tempor

In [None]:
# VISUALIZATION 7: Category-Level Ablation Study

print("Generating category-level ablation visualization...")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))

# Plot 1: F1 scores for each configuration
df_ablation_sorted = df_ablation.sort_values('f1_score', ascending=True)
y_pos = np.arange(len(df_ablation_sorted))

# Color code: full model = green, without categories = orange/red based on drop
colors_ablation = []
for _, row in df_ablation_sorted.iterrows():
    if row['configuration'] == 'Full Model (Backbone + All Temporal)':
        colors_ablation.append('#2ca02c')  # Green
    elif row['configuration'] == 'Backbone Only (All Temporal Removed)':
        colors_ablation.append('#d62728')  # Red
    else:
        # Orange shades based on drop magnitude
        drop = abs(row['delta_f1'])
        if drop < 0.002:
            colors_ablation.append('#ffeda0')  # Light yellow
        elif drop < 0.005:
            colors_ablation.append('#feb24c')  # Orange
        else:
            colors_ablation.append('#f03b20')  # Dark orange

ax1.barh(y_pos, df_ablation_sorted['f1_score'], color=colors_ablation, alpha=0.85)
ax1.set_yticks(y_pos)
ax1.set_yticklabels(df_ablation_sorted['configuration'], fontsize=10)
ax1.set_xlabel('F1 Score', fontsize=12, fontweight='bold')
ax1.set_title('Ablation Study: F1 Score by Configuration\n(Which Categories Are Essential?)',
              fontsize=13, fontweight='bold')
ax1.axvline(x=f1_combined, color='green', linestyle='--', linewidth=2,
            label=f'Full Model ({f1_combined:.4f})')
ax1.axvline(x=f1_backbone, color='red', linestyle='--', linewidth=2,
            label=f'Backbone Only ({f1_backbone:.4f})')
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3, axis='x')

# Add value labels
for i, (yval, score) in enumerate(zip(y_pos, df_ablation_sorted['f1_score'])):
    ax1.text(score + 0.001, yval, f'{score:.4f}',
             va='center', fontsize=9, fontweight='bold')

# Plot 2: Delta F1 (impact of removal)
df_ablation_impact = df_ablation[df_ablation['configuration'].str.startswith('Without')].copy()
df_ablation_impact = df_ablation_impact.sort_values('delta_f1', ascending=True)

y_pos_impact = np.arange(len(df_ablation_impact))
colors_impact = ['red' if delta < -0.003 else 'orange' if delta < -0.001 else 'yellow'
                 for delta in df_ablation_impact['delta_f1']]

ax2.barh(y_pos_impact, df_ablation_impact['delta_f1'], color=colors_impact, alpha=0.85)
ax2.set_yticks(y_pos_impact)
ax2.set_yticklabels([conf.replace('Without ', '') for conf in df_ablation_impact['configuration']],
                     fontsize=10)
ax2.set_xlabel('ΔF1 (vs Full Model)', fontsize=12, fontweight='bold')
ax2.set_title('Impact of Removing Each Category\n(Negative = Performance Drop)',
              fontsize=13, fontweight='bold')
ax2.axvline(x=0, color='black', linestyle='-', linewidth=1)
ax2.grid(True, alpha=0.3, axis='x')

# Add value labels
for i, (yval, delta) in enumerate(zip(y_pos_impact, df_ablation_impact['delta_f1'])):
    x_pos = delta - 0.0005 if delta < 0 else delta + 0.0005
    ax2.text(x_pos, yval, f'{delta:.4f}',
             va='center', ha='right' if delta < 0 else 'left',
             fontsize=9, fontweight='bold')

plt.suptitle('Ablation Study: Importance of Each Temporal Feature Category',
             fontsize=16, fontweight='bold')
plt.tight_layout()
ablation_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'ablation_category_level.png')
plt.savefig(ablation_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {ablation_plot_path}")

Generating category-level ablation visualization...
   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/ablation_category_level.png


In [None]:

# ABLATION STUDY - INDIVIDUAL FEATURES (TOP TEMPORAL)



print("ABLATION STUDY - INDIVIDUAL TOP TEMPORAL FEATURES")


print("Testing removal of top 20 temporal features individually...")

# Get top 20 temporal features from combined model
top_temporal_features = combined_importance[
    combined_importance['type'] == 'Temporal'
].head(20)['feature'].tolist()

individual_ablation_results = []

for feat in tqdm(top_temporal_features, desc="Ablating features"):
    # Create feature set without this feature
    features_without_feat = [f for f in all_features if f != feat]

    # Train model
    X_ablation_train = train_df[features_without_feat].values
    X_ablation_test = test_df[features_without_feat].values

    xgb_ablation = XGBClassifier(
        n_estimators=500,
        max_depth=9,
        learning_rate=0.15,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42,
        eval_metric='logloss',
        use_label_encoder=False
    )

    xgb_ablation.fit(X_ablation_train, y_temporal_train)
    y_pred_ablation = xgb_ablation.predict(X_ablation_test)
    f1_ablation = f1_score(y_temporal_test, y_pred_ablation)

    delta = f1_ablation - f1_combined

    # Get feature importance from combined model
    feat_importance = combined_importance[combined_importance['feature'] == feat]['importance_pct'].values[0]
    feat_category = feature_groups.get(feat, 'Unknown')

    individual_ablation_results.append({
        'feature': feat,
        'category': feat_category,
        'importance_pct': feat_importance,
        'f1_without': f1_ablation,
        'delta_f1': delta
    })

df_individual_ablation = pd.DataFrame(individual_ablation_results)
df_individual_ablation = df_individual_ablation.sort_values('delta_f1', ascending=True)

# Save individual ablation results
individual_ablation_path = os.path.join(OUTPUT_DIR, 'tables', 'ablation_individual_features.csv')
df_individual_ablation.to_csv(individual_ablation_path, index=False)
print(f"\n Individual ablation results saved: {individual_ablation_path}")

print("Individual Feature Ablation Summary (Top 20 Temporal Features):")
print(df_individual_ablation[['feature', 'category', 'importance_pct', 'delta_f1']].to_string(index=False))

# Identify most critical individual features
print("Most Critical Individual Features (largest drop when removed):")
most_critical = df_individual_ablation.head(5)
for idx, row in most_critical.iterrows():
    print(f"{row['feature']} [{row['category']}]: {row['delta_f1']:.4f} drop")


ABLATION STUDY - INDIVIDUAL TOP TEMPORAL FEATURES
Testing removal of top 20 temporal features individually...


Ablating features: 100%|██████████| 20/20 [03:05<00:00,  9.26s/it]



 Individual ablation results saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/ablation_individual_features.csv
Individual Feature Ablation Summary (Top 20 Temporal Features):
                     feature           category  importance_pct  delta_f1
temp_rel_confidence_variance          Relations        1.801754 -0.006178
           tg_avg_out_degree    Graph-Theoretic        3.265255 -0.005139
   temp_constraint_csp_score        Constraints        1.915049 -0.005139
 temp_rel_parallel_edge_rate          Relations        2.082510 -0.005139
            tg_avg_in_degree    Graph-Theoretic        1.851448 -0.005139
         tg_global_coherence    Graph-Theoretic        2.505810 -0.004739
                  tg_density Graph Organization        3.632725 -0.003921
             temp_num_events    Event Structure        3.516800 -0.003497
         temp_scope_variance        Constraints        3.918911 -0.003259
     temp_deixis_consistency       Form-Meaning        2.18083

In [None]:

# VISUALIZATION 8: Individual Feature Ablation


print("Generating individual feature ablation visualization...")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))

# Plot 1: Delta F1 for each feature removal
df_plot_ablation = df_individual_ablation.sort_values('delta_f1', ascending=True)
y_pos = np.arange(len(df_plot_ablation))

# Color by category
colors_feat_ablation = [category_colors.get(cat, 'gray') for cat in df_plot_ablation['category']]

ax1.barh(y_pos, df_plot_ablation['delta_f1'], color=colors_feat_ablation, alpha=0.8)
ax1.set_yticks(y_pos)
ax1.set_yticklabels(df_plot_ablation['feature'], fontsize=9)
ax1.set_xlabel('ΔF1 (vs Full Model)', fontsize=12, fontweight='bold')
ax1.set_title('Impact of Removing Each Top Temporal Feature\n(Negative = Performance Drop)',
              fontsize=13, fontweight='bold')
ax1.axvline(x=0, color='black', linestyle='-', linewidth=1)
ax1.grid(True, alpha=0.3, axis='x')

# Add value labels for top 10
for i in range(min(10, len(y_pos))):
    yval = y_pos[i]
    delta = df_plot_ablation.iloc[i]['delta_f1']
    x_pos = delta - 0.0002 if delta < 0 else delta + 0.0002
    ax1.text(x_pos, yval, f'{delta:.4f}',
             va='center', ha='right' if delta < 0 else 'left',
             fontsize=8, fontweight='bold')

# Plot 2: Importance vs Impact scatter
ax2.scatter(df_individual_ablation['importance_pct'],
            -df_individual_ablation['delta_f1'],  # Negative so drops are positive
            c=[category_colors.get(cat, 'gray') for cat in df_individual_ablation['category']],
            s=100, alpha=0.7, edgecolors='black', linewidths=0.5)

ax2.set_xlabel('Feature Importance (% Gain)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Performance Drop When Removed (|ΔF1|)', fontsize=12, fontweight='bold')
ax2.set_title('Importance vs Impact Analysis\n(Do Important Features Actually Matter?)',
              fontsize=13, fontweight='bold')
ax2.grid(True, alpha=0.3)

# Add diagonal reference line
max_val = max(df_individual_ablation['importance_pct'].max(),
              (-df_individual_ablation['delta_f1']).max())
ax2.plot([0, max_val], [0, max_val * 0.01], 'k--', alpha=0.3, linewidth=1)

# Annotate top 5 most impactful
top_5_impact = df_individual_ablation.nsmallest(5, 'delta_f1')
for _, row in top_5_impact.iterrows():
    ax2.annotate(row['feature'].replace('temp_', '').replace('tg_', ''),
                xy=(row['importance_pct'], -row['delta_f1']),
                xytext=(5, 5), textcoords='offset points',
                fontsize=8, alpha=0.8,
                bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.5))

# Add legend
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor=color, label=cat, alpha=0.7)
                   for cat, color in category_colors.items()
                   if cat in df_individual_ablation['category'].values]
ax2.legend(handles=legend_elements, loc='upper left', fontsize=9)

plt.suptitle('Individual Feature Ablation: Top 20 Temporal Features',
             fontsize=16, fontweight='bold')
plt.tight_layout()
individual_ablation_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'ablation_individual_features.png')
plt.savefig(individual_ablation_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {individual_ablation_plot_path}")

Generating individual feature ablation visualization...
   Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/ablation_individual_features.png


In [None]:

print("SHAP ANALYSIS (COMBINED MODEL)")


print("Computing SHAP values for combined model...")
print("   This may take several minutes...")

# Sample data for SHAP (use subset for speed)
n_shap_samples = min(500, len(X_combined_test))
X_shap = X_combined_test[:n_shap_samples]

# Create SHAP explainer
explainer = shap.TreeExplainer(xgb_combined)

# Compute SHAP values
shap_values = explainer.shap_values(X_shap)

print(f" SHAP values computed for {n_shap_samples} samples")

# Save SHAP values
shap_path = os.path.join(OUTPUT_DIR, 'checkpoints', 'shap_values.pkl')
with open(shap_path, 'wb') as f:
    pickle.dump({
        'shap_values': shap_values,
        'X_shap': X_shap,
        'feature_names': all_features
    }, f)
print(f" SHAP values saved: {shap_path}")


STEP 16: SHAP ANALYSIS (COMBINED MODEL)

→ Computing SHAP values for combined model...
  ⚠ This may take several minutes...
✓ SHAP values computed for 500 samples
✓ SHAP values saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/checkpoints/shap_values.pkl


In [None]:

# VISUALIZATION 9: SHAP Summary Plot


print("Generating SHAP summary plot...")

fig, ax = plt.subplots(figsize=(12, 10))

# SHAP summary plot
shap.summary_plot(shap_values, X_shap,
                  feature_names=all_features,
                  max_display=30,
                  show=False)

plt.title('SHAP Feature Importance: Combined Model\n(Feature Impact on Predictions)',
          fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
shap_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'shap_summary_combined.png')
plt.savefig(shap_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {shap_plot_path}")


# VISUALIZATION 10: SHAP Bar Plot (Mean Absolute SHAP)


print("Generating SHAP bar plot...")

# Calculate mean absolute SHAP values
mean_abs_shap = np.abs(shap_values).mean(axis=0)

# Create dataframe
shap_importance = pd.DataFrame({
    'feature': all_features,
    'mean_abs_shap': mean_abs_shap,
    'type': ['Backbone' if f in backbone_features else 'Temporal' for f in all_features],
    'category': ['Backbone' if f in backbone_features else feature_groups.get(f, 'Unknown')
                 for f in all_features]
}).sort_values('mean_abs_shap', ascending=False)

# Save SHAP importance
shap_importance_path = os.path.join(OUTPUT_DIR, 'tables', 'shap_importance.csv')
shap_importance.to_csv(shap_importance_path, index=False)
print(f" SHAP importance saved: {shap_importance_path}")

# Plot top 30
top_n_shap = min(30, len(shap_importance))
df_plot_shap = shap_importance.head(top_n_shap).sort_values('mean_abs_shap', ascending=True)

fig, ax = plt.subplots(figsize=(12, max(10, top_n_shap * 0.35)))

# Color by category
colors_shap = [color_mapping.get(cat, 'gray') for cat in df_plot_shap['category']]

y_pos = np.arange(len(df_plot_shap))
ax.barh(y_pos, df_plot_shap['mean_abs_shap'], color=colors_shap, alpha=0.8)

ax.set_yticks(y_pos)
ax.set_yticklabels(df_plot_shap['feature'], fontsize=9)
ax.set_xlabel('Mean |SHAP Value|', fontsize=12, fontweight='bold')
ax.set_title(f'Top {top_n_shap} Features by SHAP Importance\n(Average Impact on Model Output)',
             fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')

# Add legend
legend_elements = [Patch(facecolor=color_mapping['Backbone'], label='Backbone', alpha=0.8)]
legend_elements += [Patch(facecolor=color, label=cat, alpha=0.8)
                    for cat, color in category_colors.items()
                    if cat in df_plot_shap['category'].values]
ax.legend(handles=legend_elements, loc='lower right', fontsize=9, ncol=2)

plt.tight_layout()
shap_bar_plot_path = os.path.join(OUTPUT_DIR, 'plots', 'shap_bar_top30.png')
plt.savefig(shap_bar_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"   Saved: {shap_bar_plot_path}")


→ Generating SHAP summary plot...
  ✓ Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/shap_summary_combined.png

→ Generating SHAP bar plot...
✓ SHAP importance saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/tables/shap_importance.csv
  ✓ Saved: /content/drive/MyDrive/Tesi Magistrale/temporal_analysis/plots/shap_bar_top30.png
