# 1. 📚 SETUP AND PREPARATION

## 1.1 Libraries and Dependencies

In [None]:
import os
import pickle
from datetime import datetime
import warnings

import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import (
    train_test_split,
    cross_val_score,
    GridSearchCV,
    StratifiedKFold,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    classification_report,
    roc_auc_score,
    roc_curve,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    ConfusionMatrixDisplay,
)

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from imblearn.over_sampling import SMOTE
import xgboost as xgb
import lightgbm as lgb

warnings.filterwarnings("ignore")

## 1.2 Configuration Variables and Paths  

In [None]:
DATASET_OUTLIERS_TREATED = '../dataset_output/dataset_outliers_treated.csv'
DATASET_OUTLIERS_UNTREATED = '../dataset_output/dataset_outliers_untreated.csv'

FOLDER_RESULTS = "../results/"

FOLDER_RESULTS_EXP1 = f"{FOLDER_RESULTS}experiment_1/"
FOLDER_RESULTS_EXP2 = f"{FOLDER_RESULTS}experiment_2/"
FOLDER_RESULTS_EXP3 = f"{FOLDER_RESULTS}experiment_3/"
FOLDER_RESULTS_EXP4 = f"{FOLDER_RESULTS}experiment_4/"


RESULT_EXP1 = f"{FOLDER_RESULTS_EXP1}experiment_1_with_smote_results.csv"
RESULT_EXP2= f"{FOLDER_RESULTS_EXP2}experiment_2_without_smote_results.csv"
RESULT_EXP3 = f"{FOLDER_RESULTS_EXP3}experiment_3_with_smote_results.csv"
RESULT_EXP4= f"{FOLDER_RESULTS_EXP4}experiment_4_without_smote_results.csv"
RESULT_SUMMARY = f"{FOLDER_RESULTS}experiments_summary.csv"

## 1.3 Auxiliary Functions

In [None]:
def load_dataset(file_path):
    """Load data from a CSV file"""
    print("-" * 50)
    print(f"📊 Loading dataset from: {file_path}")
    data = pd.read_csv(file_path)
    print("✅ Dataset loaded successfully!")
    print("-" * 50)
    return data

In [None]:
def show_toxicity_distribution(data):
    """Shows the toxicity distribution with specific formatting."""
    print("-" * 50)
    print("📊 Analyzing toxicity distribution...")
    
    target_counts = data['Toxicity'].value_counts()
    target_counts.plot(kind='bar', color='orange')
    plt.title('Original Class Distribution')
    plt.xlabel('Class')
    plt.ylabel('Frequency')
    plt.xticks([0, 1], ['Non-Toxic (0)', 'Toxic (1)'], rotation=0)
    for i, v in enumerate(target_counts):
        plt.text(i, v + 1, str(v), ha='center')
    plt.tight_layout()
    plt.show()

    print("Original distribution:")
    print("Non-Toxic (0):", (data['Toxicity'] == 0).sum())
    print("Toxic (1):", (data['Toxicity'] == 1).sum())
    print("\nProportions:")
    proportions = data['Toxicity'].value_counts(normalize=True)
    print(f"Non-Toxic (0): {proportions[0]:.3f} ({proportions[0]*100:.1f}%)")
    print(f"Toxic (1): {proportions[1]:.3f} ({proportions[1]*100:.1f}%)")
    
    print("✅ Distribution analysis completed!")
    print("-" * 50)

In [None]:
def prepare_data_split(data, target_column='Toxicity', test_size=0.2, random_state=42):
    """
    Prepares and splits the data into training and test sets.
    """
    print("-" * 50)
    print("🔄 Preparing data split...")
    
    X = data.drop(columns=target_column)
    y = data[target_column]
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, stratify=y, random_state=random_state
    )
    
    print(f"\n🔹 Total samples: {len(X)}")
    print(f"🔹 Training: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
    print(f"🔹 Test: {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")
    
    print(f"\n🔹 Class distribution:")
    
    total_0 = (y == 0).sum()
    total_1 = (y == 1).sum()
    print(f"Total - Class 0: {total_0} ({total_0/len(y)*100:.1f}%), Class 1: {total_1} ({total_1/len(y)*100:.1f}%)")
    
    train_0 = (y_train == 0).sum()
    train_1 = (y_train == 1).sum()
    print(f"Train - Class 0: {train_0} ({train_0/len(y_train)*100:.1f}%), Class 1: {train_1} ({train_1/len(y_train)*100:.1f}%)")
    
    test_0 = (y_test == 0).sum()
    test_1 = (y_test == 1).sum()
    print(f"Test  - Class 0: {test_0} ({test_0/len(y_test)*100:.1f}%), Class 1: {test_1} ({test_1/len(y_test)*100:.1f}%)")
    
    print("✅ Data split completed!")
    print("-" * 50)
    return X_train, X_test, y_train, y_test

In [None]:
def apply_smote_balancing(X_train, y_train, use_smote=True, random_state=42):
    """
    Applies SMOTE to balance classes in the training set or returns original data.
    """
    print("-" * 50)
    
    if use_smote:
        print("⚖️ Applying SMOTE balancing...")
    else:
        print("⚖️ Using original imbalanced data (no SMOTE)...")
    
    original_0 = (y_train == 0).sum()
    original_1 = (y_train == 1).sum()
    print(f"\n🔹 Original distribution:")
    print(f"Class 0: {original_0} ({original_0/len(y_train)*100:.1f}%)")
    print(f"Class 1: {original_1} ({original_1/len(y_train)*100:.1f}%)")
    
    if use_smote:
        smote = SMOTE(random_state=random_state)
        X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
        
        new_0 = (y_train_res == 0).sum()
        new_1 = (y_train_res == 1).sum()
        print(f"\n🔹 After SMOTE distribution:")
        print(f"Class 0: {new_0} ({new_0/len(y_train_res)*100:.1f}%)")
        print(f"Class 1: {new_1} ({new_1/len(y_train_res)*100:.1f}%)")
        
        print(f"\n🔹 CHANGES:")
        print(f"Samples added: {len(y_train_res) - len(y_train)}")
        print(f"Minority class increased: {new_1 - original_1} samples")
        
        print("✅ SMOTE balancing completed!")
    else:
        X_train_res, y_train_res = X_train, y_train
        
        print(f"\n🔹 Final distribution (unchanged):")
        print(f"Class 0: {original_0} ({original_0/len(y_train_res)*100:.1f}%)")
        print(f"Class 1: {original_1} ({original_1/len(y_train_res)*100:.1f}%)")
        
        print(f"\n🔹 NO CHANGES:")
        print(f"Total samples: {len(y_train_res)}")
        print(f"Data remains imbalanced")
        
        print("✅ Original data ready for training!")
    
    print("-" * 50)
    return X_train_res, y_train_res

In [18]:
def get_models_and_params():
    """
    Define los modelos y sus parámetros para optimización.
    """
    models_params = {
        "Logistic Regression": {
            'model': LogisticRegression(max_iter=1000, random_state=42),
            'params': {
                'C': [0.1, 1.0, 10.0],
                'solver': ['liblinear', 'lbfgs']
            }
        },
        "Decision Tree": {
            'model': DecisionTreeClassifier(random_state=42),
            'params': {
                'max_depth': [3, 5, 7, 10],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]
            }
        },
        "Random Forest": {
            'model': RandomForestClassifier(random_state=42),
            'params': {
                'n_estimators': [50, 100, 200],
                'max_depth': [3, 5, 7],
                'min_samples_split': [2, 5],
                'min_samples_leaf': [1, 2]
            }
        },
        "XGBoost": {
            'model': xgb.XGBClassifier(eval_metric='logloss', random_state=42),
            'params': {
                'n_estimators': [50, 100, 200],
                'max_depth': [3, 4, 5],
                'learning_rate': [0.01, 0.1, 0.2],
                'subsample': [0.8, 1.0]
            }
        },
        "LightGBM": {
            'model': lgb.LGBMClassifier(verbosity=-1, random_state=42),
            'params': {
                'n_estimators': [50, 100, 200],
                'max_depth': [3, 4, 5],
                'learning_rate': [0.01, 0.1, 0.2],
                'num_leaves': [20, 31, 50]
            }
        },
        "SVM": {
            'model': make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True, random_state=42)),
            'params': {
                'svc__C': [0.1, 1.0, 10.0],
                'svc__gamma': ['scale', 'auto', 0.01, 0.1]
            }
        }
    }
    return models_params

In [None]:
def train_and_optimize_models(X_train, y_train, X_test, y_test, cv_folds=5, experiment_name="Experiment"):
    """
    Trains and optimizes multiple models using GridSearchCV and cross-validation.
    Includes classic classification metrics.
    """
    
    print("="*70)
    print(f"🤖 TRAINING MODELS - {experiment_name}")
    print("="*70)
    
    models_params = get_models_and_params()
    results = []
    best_models = {}
    
    skf = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)
    
    for model_name, config in models_params.items():
        print(f"\n{'='*50}")
        print(f"🔧 Training: {model_name}")
        print(f"{'='*50}")
        
        model = config['model']
        param_grid = config['params']
        
        print("🔍 Performing hyperparameter optimization...")
        grid_search = GridSearchCV(
            estimator=model,
            param_grid=param_grid,
            cv=skf,
            scoring='roc_auc',
            n_jobs=-1,
            verbose=0
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        best_models[model_name] = best_model
        
        y_pred_train = best_model.predict(X_train)
        y_pred_test = best_model.predict(X_test)
        y_pred_proba_train = best_model.predict_proba(X_train)[:, 1]
        y_pred_proba_test = best_model.predict_proba(X_test)[:, 1]
        
        cv_scores = cross_val_score(best_model, X_train, y_train, cv=skf, scoring='roc_auc')
        
        train_auc = roc_auc_score(y_train, y_pred_proba_train)
        test_auc = roc_auc_score(y_test, y_pred_proba_test)
        
        train_accuracy = accuracy_score(y_train, y_pred_train)
        train_precision = precision_score(y_train, y_pred_train, average='weighted')
        train_recall = recall_score(y_train, y_pred_train, average='weighted')
        train_f1 = f1_score(y_train, y_pred_train, average='weighted')
        
        test_accuracy = accuracy_score(y_test, y_pred_test)
        test_precision = precision_score(y_test, y_pred_test, average='weighted')
        test_recall = recall_score(y_test, y_pred_test, average='weighted')
        test_f1 = f1_score(y_test, y_pred_test, average='weighted')
        
        result = {
            'Model': model_name,
            'Best_Params': str(grid_search.best_params_),
            'CV_AUC_Mean': cv_scores.mean(),
            'CV_AUC_Std': cv_scores.std(),
            'Best_CV_Score': grid_search.best_score_,
            'Train_AUC': train_auc,
            'Train_Accuracy': train_accuracy,
            'Train_Precision': train_precision,
            'Train_Recall': train_recall,
            'Train_F1': train_f1,
            'Test_AUC': test_auc,
            'Test_Accuracy': test_accuracy,
            'Test_Precision': test_precision,
            'Test_Recall': test_recall,
            'Test_F1': test_f1
        }
        results.append(result)
        
        print(f"✅ Best parameters: {grid_search.best_params_}")
        print(f"📊 Cross-validation AUC: {cv_scores.mean():.3f} (+/- {cv_scores.std()*2:.3f})")
        print(f"📊 Best CV Score: {grid_search.best_score_:.3f}")
        
        print(f"\n📈 TRAINING METRICS:")
        print(f"   AUC: {train_auc:.3f}")
        print(f"   Accuracy: {train_accuracy:.3f}")
        print(f"   Precision: {train_precision:.3f}")
        print(f"   Recall: {train_recall:.3f}")
        print(f"   F1-Score: {train_f1:.3f}")
        
        print(f"\n🎯 TEST METRICS:")
        print(f"   AUC: {test_auc:.3f}")
        print(f"   Accuracy: {test_accuracy:.3f}")
        print(f"   Precision: {test_precision:.3f}")
        print(f"   Recall: {test_recall:.3f}")
        print(f"   F1-Score: {test_f1:.3f}")
    
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('Test_AUC', ascending=False).reset_index(drop=True)
    
    print(f"\n{'='*70}")
    print(f"🏆 FINAL RESULTS - {experiment_name}")
    print(f"{'='*70}")
    
    display_cols = ['Model', 'CV_AUC_Mean', 'Test_AUC', 'Test_Accuracy', 'Test_F1']
    print(results_df[display_cols].to_string(index=False))
    
    return results_df, best_models

In [None]:
def display_detailed_results(results_df, experiment_name):
    """
    Shows a detailed summary of all metrics by model.
    """
    print(f"\n{'='*80}")
    print(f"📊 DETAILED METRICS SUMMARY - {experiment_name}")
    print(f"{'='*80}")
    
    for idx, row in results_df.iterrows():
        print(f"\n🔸 {row['Model']} (Rank #{idx+1})")
        print(f"   Cross-Validation AUC: {row['CV_AUC_Mean']:.3f} (±{row['CV_AUC_Std']:.3f})")
        print(f"   Test Metrics:")
        print(f"      AUC:       {row['Test_AUC']:.3f}")
        print(f"      Accuracy:  {row['Test_Accuracy']:.3f}")
        print(f"      Precision: {row['Test_Precision']:.3f}")
        print(f"      Recall:    {row['Test_Recall']:.3f}")
        print(f"      F1-Score:  {row['Test_F1']:.3f}")
        print(f"   Best Params: {row['Best_Params']}")
        print("-" * 60)

In [None]:
def generate_roc_curve(y_test, y_pred_proba, model_name, figsize=(8, 6)):
    """
    Generates only the ROC Curve plot.
    """
    auc_score = roc_auc_score(y_test, y_pred_proba)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    
    fig, ax = plt.subplots(figsize=figsize)
    
    ax.plot(fpr, tpr, color='darkorange', lw=2, 
            label=f'ROC curve (AUC = {auc_score:.3f})')
    
    ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', 
            label='Random classifier')
    
    ax.set_xlabel('False Positive Rate', fontsize=16)
    ax.set_ylabel('True Positive Rate', fontsize=16)
    ax.set_title(f'ROC Curve - {model_name}\nAUC Score: {auc_score:.3f}', 
                 fontsize=16, fontweight='bold', pad=20)
    ax.legend(loc="lower right")
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
def generate_confusion_matrix(model, X_test, y_test, model_name, figsize=(8, 6)):
    """
    Generates only the Confusion Matrix plot.
    """
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    auc_score = roc_auc_score(y_test, y_pred_proba)
    
    fig, ax = plt.subplots(figsize=figsize)
    ConfusionMatrixDisplay.from_estimator(
        model, X_test, y_test, 
        cmap='Blues',
        ax=ax,
        colorbar=True,
        display_labels=['Non-Toxic (0)', 'Toxic (1)']
    )
    
    plt.title(f"Confusion Matrix - {model_name}\nAUC Score: {auc_score:.3f}", 
              fontsize=14, fontweight='bold', pad=20)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    
    plt.tight_layout()
    plt.show()

In [None]:
def detailed_model_evaluation(model, model_name, X_test, y_test):
    """
    Detailed evaluation of a specific model.
    """
    print(f"\n{'='*50}")
    print(f"📊 DETAILED EVALUATION: {model_name}")
    print(f"{'='*50}")
    
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    auc_score = roc_auc_score(y_test, y_pred_proba)
    print(f"🎯 AUC Score: {auc_score:.3f}")
    
    print(f"\n📋 Classification Report:")
    print(classification_report(y_test, y_pred))
    
    print(f"\n🔢 Confusion Matrix:")
    generate_confusion_matrix(model, X_test, y_test, model_name)
    
    print(f"\n📈 ROC Curve:")
    generate_roc_curve(y_test, y_pred_proba, model_name)
    
    return auc_score

In [None]:
def save_best_model(model, model_name, experiment_name, save_path="../results/"):
    """
    Saves only the trained model.
    """
    print(f"\n💾 SAVING MODEL: {model_name}")
    print("-" * 40)
    
    try:
        os.makedirs(save_path, exist_ok=True)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        exp_clean = experiment_name.replace(" ", "_").replace(":", "").replace("(", "").replace(")", "").lower()
        model_clean = model_name.replace(" ", "_").lower()
        
        model_filename = f"{exp_clean}_{model_clean}_{timestamp}.pkl"
        model_path = os.path.join(save_path, model_filename)
        
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        
        print(f"✅ Model saved successfully!")
        print(f"📁 File: {model_path}")
        
        return model_path
        
    except Exception as e:
        print(f"❌ Error saving model: {e}")
        return None

In [None]:
def evaluate_best_model(results_df, models_dict, X_test, y_test, experiment_name, save_model=True, save_path="../results/"):
    """
    Evaluates the best model from a specific experiment and optionally saves it.
    """
    print("\n" + "="*70)
    print(f"🔍 DETAILED EVALUATION - BEST MODEL FROM {experiment_name}")
    print("="*70)
    
    best_model_name = results_df.iloc[0]['Model']
    best_model = models_dict[best_model_name]
    best_auc = results_df.iloc[0]['Test_AUC']
    best_cv_auc = results_df.iloc[0]['CV_AUC_Mean']
    best_cv_std = results_df.iloc[0]['CV_AUC_Std']
    
    print(f"🏆 Best Model: {best_model_name}")
    print(f"🎯 Test AUC: {best_auc:.3f}")
    print(f"📊 CV AUC: {best_cv_auc:.3f} (±{best_cv_std:.3f})")
    print(f"📋 Best Parameters: {results_df.iloc[0]['Best_Params']}")
    
    auc_score = detailed_model_evaluation(best_model, best_model_name, X_test, y_test)
    
    model_path = None
    if save_model:
        model_path = save_best_model(best_model, best_model_name, experiment_name, save_path)
    
    print(f"\n✅ Best model evaluation for {experiment_name} completed!")
    print("="*70)
    
    return best_model_name, best_model, auc_score, model_path

In [None]:
def simple_experiments_analysis(results_exp1, results_exp2, results_exp3, results_exp4):
    """
    Simple analysis of all 4 experiments.
    """
    print("="*70)
    print("🏁 FINAL EXPERIMENTS SUMMARY")
    print("="*70)
    
    experiments = [
        ("Experiment 1: Outliers treated with SMOTE", results_exp1.iloc[0]),
        ("Experiment 2: Outliers treated without SMOTE", results_exp2.iloc[0]),
        ("Experiment 3: Outliers untreated with SMOTE", results_exp3.iloc[0]),
        ("Experiment 4: Outliers untreated without SMOTE", results_exp4.iloc[0])
    ]
    
    print("\n🏆 BEST MODELS BY EXPERIMENT:")
    print("-" * 70)
    for exp_name, best_model in experiments:
        print(f"{exp_name}")
        print(f"   Model: {best_model['Model']}")
        print(f"   Test AUC: {best_model['Test_AUC']:.3f}")
        print(f"   Accuracy: {best_model['Test_Accuracy']:.3f}")
        print(f"   F1-Score: {best_model['Test_F1']:.3f}")
        print()
    
    best_auc = max(exp[1]['Test_AUC'] for exp in experiments)
    winner = next(exp for exp in experiments if exp[1]['Test_AUC'] == best_auc)
    
    print("🥇 WINNING EXPERIMENT:")
    print(f"   {winner[0]}")
    print(f"   Model: {winner[1]['Model']}")
    print(f"   Test AUC: {winner[1]['Test_AUC']:.3f}")
    print()
    
    return winner

In [None]:
def save_results_simple(results_exp1, results_exp2, results_exp3, results_exp4):
    """
    Save results to CSV files.
    """
    print("💾 SAVING RESULTS...")
    
    results_exp1.to_csv(RESULT_EXP1, index=False)
    results_exp2.to_csv(RESULT_EXP2, index=False) 
    results_exp3.to_csv(RESULT_EXP3, index=False)
    results_exp4.to_csv(RESULT_EXP4, index=False)
    
    summary_data = [
        {
            'Experiment': 'EXP1_WITH_OUTLIERS_TREATED_WITH_SMOTE',
            'Best_Model': results_exp1.iloc[0]['Model'],
            'Test_AUC': results_exp1.iloc[0]['Test_AUC'],
            'Test_Accuracy': results_exp1.iloc[0]['Test_Accuracy'],
            'Test_F1': results_exp1.iloc[0]['Test_F1']
        },
        {
            'Experiment': 'EXP2_WITH_OUTLIERS_TREATED_NO_SMOTE',
            'Best_Model': results_exp2.iloc[0]['Model'],
            'Test_AUC': results_exp2.iloc[0]['Test_AUC'],
            'Test_Accuracy': results_exp2.iloc[0]['Test_Accuracy'],
            'Test_F1': results_exp2.iloc[0]['Test_F1']
        },
        {
            'Experiment': 'EXP3_WITH_OUTLIERS_UNTREATED_WITH_SMOTE',
            'Best_Model': results_exp3.iloc[0]['Model'],
            'Test_AUC': results_exp3.iloc[0]['Test_AUC'],
            'Test_Accuracy': results_exp3.iloc[0]['Test_Accuracy'],
            'Test_F1': results_exp3.iloc[0]['Test_F1']
        },
        {
            'Experiment': 'EXP4_WITH_OUTLIERS_UNTREATED_NO_SMOTE',
            'Best_Model': results_exp4.iloc[0]['Model'],
            'Test_AUC': results_exp4.iloc[0]['Test_AUC'],
            'Test_Accuracy': results_exp4.iloc[0]['Test_Accuracy'],
            'Test_F1': results_exp4.iloc[0]['Test_F1']
        }
    ]
    
    summary_df = pd.DataFrame(summary_data)
    summary_df.to_csv(RESULT_SUMMARY, index=False)
    
    print("✅ Results saved:")
    print(f"   📁 {RESULT_EXP1}")
    print(f"   📁 {RESULT_EXP2}")
    print(f"   📁 {RESULT_EXP3}")
    print(f"   📁 {RESULT_EXP4}")
    print(f"   📁 {RESULT_SUMMARY}")

In [None]:
def compare_best_models_roc_curves(figsize=(10, 8)):
    """
    Compare ROC curves of the best models from all 4 experiments in a single plot.
    """
    print("📊 ROC CURVES COMPARISON - BEST MODELS FROM EACH EXPERIMENT")
    print("=" * 70)
    
    plt.figure(figsize=figsize)
    
    experiments_data = [
        ("Experiment 1: Outliers treated with SMOTE", best_model_exp1, X_test_1, y_test_1, best_model_name_exp1),
        ("Experiment 2: Outliers treated without SMOTE", best_model_exp2, X_test_2, y_test_2, best_model_name_exp2),
        ("Experiment 3: Outliers untreated with SMOTE", best_model_exp3, X_test_3, y_test_3, best_model_name_exp3),
        ("Experiment 4: Outliers untreated without SMOTE", best_model_exp4, X_test_4, y_test_4, best_model_name_exp4)
    ]
    
    colors = ['darkorange', 'green', 'red', 'blue']
    
    for i, (exp_name, model, X_test, y_test, model_name) in enumerate(experiments_data):
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        auc_score = roc_auc_score(y_test, y_pred_proba)
        
        plt.plot(fpr, tpr, color=colors[i], lw=2,
                label=f'{exp_name}\n{model_name} (AUC = {auc_score:.3f})')
        
        print(f"📈 {exp_name}")
        print(f"   AUC: {auc_score:.3f}")
        print()
    
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', 
             label='Random classifier (AUC = 0.500)')
    
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)
    plt.title('ROC Curves Comparison - Best Models by Experiment', 
              fontsize=16, fontweight='bold', pad=20)
    plt.legend(loc="lower right", fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print("✅ ROC curves comparison completed!")

# 2. 🗂️ DATA LOADING

## 2.1 Dataset Loading

### 2.1.1 Dataset with Treated Outliers

In [None]:
data_outliers_treated = load_dataset(DATASET_OUTLIERS_TREATED)
show_toxicity_distribution(data_outliers_treated)

### 2.1.2 Dataset with Untreated Outliers

In [None]:
data_outliers_untreated = load_dataset(DATASET_OUTLIERS_UNTREATED)
show_toxicity_distribution(data_outliers_untreated)

# 3. 🧪 EXPERIMENTAL DESIGN

## 3.1 Experimental Framework

- **4 Systematic Experiments** evaluating combinations of:
  - Outlier treatment: ✅ Treated vs ❌ Untreated
  - Class balancing: ✅ SMOTE vs ❌ Original

## 3.2 Experiment Configuration

| Experiment | Outliers | SMOTE | Description |
|------------|----------|-------|-------------|
| EXP1 | ✅ Treated | ✅ Applied | Outliers treated with SMOTE |
| EXP2 | ✅ Treated | ❌ Not applied | Outliers treated without SMOTE |
| EXP3 | ❌ Untreated | ✅ Applied | Outliers untreated with SMOTE |
| EXP4 | ❌ Untreated | ❌ Not applied | Outliers untreated without SMOTE |

# 4. 🚀 EXPERIMENTS EXECUTION

## 4.1 Experiment 1: Outliers treated with SMOTE

In [None]:
print("="*70)
print("🚀 Experiment 1: Outliers treated with SMOTE")
print("="*70)

### 4.1.1 Data Preparation

In [None]:
X_train_1, X_test_1, y_train_1, y_test_1 = prepare_data_split(data=data_outliers_treated, target_column='Toxicity', test_size=0.2, random_state=42)

### 4.1.2 SMOTE Application

In [None]:
X_train_balanced, y_train_balanced = apply_smote_balancing(X_train_1, y_train_1, use_smote=True, random_state=42)

### 4.1.3 Training and Optimization

In [None]:
results_exp1, models_exp1 = train_and_optimize_models(X_train_balanced, y_train_balanced, X_test_1, y_test_1, cv_folds=5, experiment_name="Outliers treated with SMOTE")

display_detailed_results(results_exp1, "Outliers treated with SMOTE")

### 4.1.4 Best Model Evaluation

In [None]:
best_model_name_exp1, best_model_exp1, auc_exp1, model_path_exp1 = evaluate_best_model(results_exp1, models_exp1, X_test_1, y_test_1, "Experiment 1", save_model=True, save_path=FOLDER_RESULTS_EXP1)

## 4.2 Experiment 2: Outliers treated without SMOTE

In [None]:
print("="*70)
print("🚀 Experiment 2: Outliers treated without SMOTE")
print("="*70)

### 4.2.1 Data Preparation

In [None]:
X_train_2, X_test_2, y_train_2, y_test_2 = prepare_data_split(data=data_outliers_treated, target_column='Toxicity', test_size=0.2, random_state=42)

### 4.2.2 Original Imbalanced Data

In [None]:
X_train_original, y_train_original = apply_smote_balancing(X_train_2, y_train_2, use_smote=False, random_state=42)

### 4.2.3 Training and Optimization

In [None]:
results_exp2, models_exp2 = train_and_optimize_models(X_train_original, y_train_original, X_test_2, y_test_2, cv_folds=5, experiment_name="Outliers treated without SMOTE")

display_detailed_results(results_exp2, "Outliers treated without SMOTE")

### 4.2.4 Best Model Evaluation

In [None]:
best_model_name_exp2, best_model_exp2, auc_exp2, model_path_exp2 = evaluate_best_model(results_exp2, models_exp2, X_test_2, y_test_2, "Experiment 2", save_model=True, save_path=FOLDER_RESULTS_EXP2)

## 4.3 Experiment 3: Outliers untreated with SMOTE

In [None]:
print("="*70)
print("🚀 Experiment 3: Outliers untreated with SMOTE")
print("="*70)

### 4.3.1 Data Preparation

In [None]:
X_train_3, X_test_3, y_train_3, y_test_3 = prepare_data_split(data=data_outliers_untreated, target_column='Toxicity', test_size=0.2, random_state=42)

### 4.3.2 SMOTE Application

In [None]:
X_train_balanced_3, y_train_balanced_3 = apply_smote_balancing(X_train_3, y_train_3, use_smote=True, random_state=42)

### 4.3.3 Training and Optimization

In [None]:
results_exp3, models_exp3 = train_and_optimize_models(X_train_balanced_3, y_train_balanced_3, X_test_3, y_test_3, cv_folds=5, experiment_name="Outliers untreated with SMOTE")

display_detailed_results(results_exp3, "Outliers untreated with SMOTE")

### 4.3.4 Best Model Evaluation

In [None]:
best_model_name_exp3, best_model_exp3, auc_exp3, model_path_exp3 = evaluate_best_model(results_exp3, models_exp3, X_test_3, y_test_3, "Experiment 3", save_model=True, save_path=FOLDER_RESULTS_EXP3)

## 4.4 Experiment 4: Outliers untreated without SMOTE

In [None]:
print("="*70)
print("🚀 Experiment 4: Outliers untreated without SMOTE")
print("="*70)

### 4.4.1 Data Preparation

In [None]:
X_train_4, X_test_4, y_train_4, y_test_4 = prepare_data_split(data=data_outliers_untreated, target_column='Toxicity', test_size=0.2, random_state=42)

### 4.4.2 Original Imbalanced Data

In [None]:
X_train_original_4, y_train_original_4 = apply_smote_balancing(X_train_4, y_train_4, use_smote=False, random_state=42)

### 4.4.3 Training and Optimization

In [None]:
results_exp4, models_exp4 = train_and_optimize_models(
    X_train_original_4, y_train_original_4, X_test_4, y_test_4, 
    cv_folds=5, experiment_name="Outliers untreated without SMOTE"
)

display_detailed_results(results_exp4, "Outliers untreated without SMOTE")

### 4.4.4 Best Model Evaluation

In [None]:
best_model_name_exp4, best_model_exp4, auc_exp4, model_path_exp4 = evaluate_best_model(results_exp4, models_exp4, X_test_4, y_test_4, "Experiment 4", save_model=True, save_path=FOLDER_RESULTS_EXP4)

# 5. 📊 COMPARATIVE ANALYSIS AND RESULTS

## 5.1 Comprehensive Experiments Analysis

In [None]:
winner = simple_experiments_analysis(results_exp1, results_exp2, results_exp3, results_exp4)

## 5.2 ROC Curves Comparative Visualization  

In [None]:
compare_best_models_roc_curves()

## 5.2 Results Export and Persistence

In [None]:
save_results_simple(results_exp1, results_exp2, results_exp3, results_exp4)

## 5.3 Final Results Summary

In [None]:
print("\n🎉 ANALYSIS COMPLETED!")
print(f"Winner: {winner[0]}")
print(f"Best model: {winner[1]['Model']}")