In [None]:
# XGBoost evaluation across all feature categories


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix,
                             classification_report)
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

# Paths
BASE_DRIVE = '/content/drive/MyDrive/Tesi Magistrale'
MASTER_DIR = f'{BASE_DRIVE}/master_features'
OUTPUT_DIR = f'{BASE_DRIVE}/analysis_results'

# Create output directories
import os
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(f'{OUTPUT_DIR}/models', exist_ok=True)
os.makedirs(f'{OUTPUT_DIR}/figures', exist_ok=True)
os.makedirs(f'{OUTPUT_DIR}/reports', exist_ok=True)

# Load master dataframe
df_master = pd.read_csv(f'{MASTER_DIR}/master_features_complete.csv')
print(f"Loaded master dataframe")

# Load feature categories
df_manifest = pd.read_csv(f'{MASTER_DIR}/feature_manifest.csv')
print(f"Feature manifest loaded")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded master dataframe
Feature manifest loaded


In [None]:
#Defining feature categories

# Reconstruct feature categories from manifest
FEATURE_CATEGORIES = {}
for category in df_manifest['category'].unique():
    features = df_manifest[df_manifest['category'] == category]['feature'].tolist()
    # Verify features exist in master dataframe
    features = [f for f in features if f in df_master.columns]
    FEATURE_CATEGORIES[category] = features

#Experimental configuration

# Separate features and labels
X = df_master.drop(columns=['id', 'is_ai'])
y = df_master['is_ai']

# Define experimental configurations
EXPERIMENTS = {
    # Combined experiments
    'all_features': {
        'name': 'ALL FEATURES',
        'features': [f for cat_features in FEATURE_CATEGORIES.values()
                     for f in cat_features],
        'description': 'All cognitive + backbone features'
    },
    'monocognitive': {
        'name': 'COGNITION',
        'features': [f for cat, cat_features in FEATURE_CATEGORIES.items()
                     if cat != 'BACKBONE' for f in cat_features],
        'description': 'Pure cognitive features (no stylometrics)'
    },
}

# VALIDATE ALL CONFIGURATIONS - Add valid_features to each config
print(f"Validating experimental configurations\n")
for exp_name, exp_config in EXPERIMENTS.items():
    features = exp_config['features']

    # Remove duplicates and verify existence
    features = list(dict.fromkeys(features))  # Remove duplicates while preserving order
    valid_features = [f for f in features if f in X.columns]
    missing_features = [f for f in features if f not in X.columns]

    # Store validated features in config
    exp_config['valid_features'] = valid_features
    exp_config['n_features'] = len(valid_features)
    exp_config['missing_features'] = missing_features

    if missing_features:
        print(f"{exp_name}: {len(missing_features)} missing features")

print(f"experimental configurations validated")
for exp_name, exp_config in EXPERIMENTS.items():
    status = "✓" if exp_config['n_features'] > 0 else "✗"

# Check if any experiments have no features
empty_experiments = [name for name, config in EXPERIMENTS.items() if config['n_features'] == 0]
if empty_experiments:
    print(f"{len(empty_experiments)} experiments have no valid features:")


Validating experimental configurations

experimental configurations validated


In [None]:
# XGBoost hyperparameters
XGBOOST_PARAMS = {
    'n_estimators': 500,
    'max_depth': 9,
    'learning_rate': 0.15,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': 42,
    'n_jobs': -1,
    'eval_metric': 'logloss'
}


# Cross-validation configuration
CV_FOLDS = 5

def evaluate_model(X_exp, y_exp, experiment_name, n_features, verbose=True):
    """
    Train and evaluate XGBoost model with cross-validation.

    Returns:
        results: Dictionary with all metrics and model
    """

    # Initialize model
    model = XGBClassifier(**XGBOOST_PARAMS)

    # Define cross-validation
    cv = StratifiedKFold(n_splits=CV_FOLDS, shuffle=True)

    # Define scoring metrics
    scoring = {
        'accuracy': 'accuracy',
        'precision': 'precision',
        'recall': 'recall',
        'f1': 'f1',
        'roc_auc': 'roc_auc'
    }

    # Perform cross-validation
    cv_results = cross_validate(
        model, X_exp, y_exp,
        cv=cv,
        scoring=scoring,
        return_train_score=True,
        n_jobs=-1
    )

    # Train final model on full data
    model.fit(X_exp, y_exp)
    y_pred = model.predict(X_exp)
    y_pred_proba = model.predict_proba(X_exp)[:, 1]

    # Calculate metrics
    results = {
        'experiment': experiment_name,
        'n_features': n_features,

        # Cross-validation metrics (test scores)
        'cv_accuracy_mean': cv_results['test_accuracy'].mean(),
        'cv_accuracy_std': cv_results['test_accuracy'].std(),
        'cv_precision_mean': cv_results['test_precision'].mean(),
        'cv_precision_std': cv_results['test_precision'].std(),
        'cv_recall_mean': cv_results['test_recall'].mean(),
        'cv_recall_std': cv_results['test_recall'].std(),
        'cv_f1_mean': cv_results['test_f1'].mean(),
        'cv_f1_std': cv_results['test_f1'].std(),
        'cv_roc_auc_mean': cv_results['test_roc_auc'].mean(),
        'cv_roc_auc_std': cv_results['test_roc_auc'].std(),

        # Full data metrics (for reference)
        'full_accuracy': accuracy_score(y_exp, y_pred),
        'full_precision': precision_score(y_exp, y_pred),
        'full_recall': recall_score(y_exp, y_pred),
        'full_f1': f1_score(y_exp, y_pred),
        'full_roc_auc': roc_auc_score(y_exp, y_pred_proba),

        # Model and predictions
        'model': model,
        'cv_results': cv_results,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }

    if verbose:
        print(f"CV F1: {results['cv_f1_mean']:.4f} ± {results['cv_f1_std']:.4f}")
        print(f"CV Accuracy: {results['cv_accuracy_mean']:.4f} ± {results['cv_accuracy_std']:.4f}")
        print(f" CV ROC-AUC: {results['cv_roc_auc_mean']:.4f} ± {results['cv_roc_auc_std']:.4f}")

    return results


print("Evaluation function defined")

Evaluation function defined


In [None]:
print("Running Experiments")

all_results = {}

for exp_name, exp_config in EXPERIMENTS.items():
    print(f"\n{'='*80}")
    print(f"Experiment: {exp_config['name']}")
    print(f"{'='*80}")
    print(f"Description: {exp_config['description']}")

    # Get features
    features = exp_config['valid_features']

    if len(features) == 0:
        print("No valid features")
        continue

    # Prepare data
    X_exp = X[features].copy()

    # Check for NaN
    if X_exp.isna().any().any():
        n_nan = X_exp.isna().sum().sum()
        print(f"NaN values found - filling with median")
        X_exp = X_exp.fillna(X_exp.median())

    # Run evaluation
    results = evaluate_model(
        X_exp, y,
        exp_config['name'],
        len(features),
        verbose=True
    )

    all_results[exp_name] = results

print(f"\n{'='*80}")
print(f"EXPERIMENTS COMPLETE")


Running Experiments

Experiment: ALL FEATURES
Description: All cognitive + backbone features
CV F1: 0.9234 ± 0.0056
CV Accuracy: 0.9246 ± 0.0055
CV ROC-AUC: 0.9778 ± 0.0026

Experiment: COGNITION
Description: Pure cognitive features (no stylometrics)
CV F1: 0.8660 ± 0.0041
CV Accuracy: 0.8691 ± 0.0041
CV ROC-AUC: 0.9442 ± 0.0058

EXPERIMENTS COMPLETE
