# 1. LIBRERIES

In [None]:
import os
import pandas as pd
import numpy as np
import joblib
from datetime import datetime
import warnings
import pickle
import time
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Embedding, Dense, Dropout, Concatenate, 
    Flatten, BatchNormalization, Add
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.data import Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, RobustScaler, MinMaxScaler
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score, recall_score,
    confusion_matrix, classification_report, roc_curve, auc, roc_auc_score,
    matthews_corrcoef, balanced_accuracy_score, cohen_kappa_score, 
    log_loss, brier_score_loss
)
from sklearn.utils.class_weight import compute_class_weight

# 2. GPU CONFIGURATION

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"\n✓ GPU DETECTED:")
        for i, gpu in enumerate(gpus):
            print(f"  • GPU {i}: {gpu.name}")
        print("  • Memory growth enabled")
    except RuntimeError as e:
        print(e)
else:
    print("\nNo GPU - using CPU")

# 3. CREATE FOLDER STRUCTURE

In [None]:
folders = [
    'db/02b_neural_networks/saved_models',
    'db/02b_neural_networks/predictions',
    'db/02b_neural_networks/metrics',
    'db/02b_neural_networks/metrics/by_architecture',
    'db/02b_neural_networks/model_data/confusion_matrices',
    'db/02b_neural_networks/model_data/confusion_matrices/by_architecture',
    'db/02b_neural_networks/model_data/roc_data',
    'db/02b_neural_networks/model_data/roc_data/by_architecture',
    'db/02b_neural_networks/model_data/learning_curves',
    'db/02b_neural_networks/model_data/learning_curves/by_architecture',
    'db/02b_neural_networks/model_data/architecture_comparisons',
    'db/02b_neural_networks/model_data/hyperparameters',
    'db/02b_neural_networks/comparative_tables'
]

for folder in folders:
    os.makedirs(folder, exist_ok=True)

# 4. LOAD CLEAN DATA

In [None]:
output_file = 'db/01_cleaned_data/displacement_vs_others_final.csv'
df = pd.read_csv(output_file)

print(f"\n Data loaded: {len(df):,} records")
print(f"Columns: {df.columns.tolist()}")

# 5. DATA PREPARATION (FULL PIPELINE)

In [None]:
predictor_vars = [# Categorical variables
                  'ESTADO_DEPTO',                    
                  'SEXO',            
                  'ETNIA',           
                  'DISCAPACIDAD',    
                  'CICLO_VITAL', 
                  # Numeric variables
                  'VIGENCIA',          
                  'EVENTOS',
                  'km_norte_sur', 
                  'km_este_oeste', 
                  'distancia_total'
                  ]

target_var = 'Desplazamiento_forzado_binaria'

In [None]:
df['EVENTOS'] = pd.to_numeric(df['EVENTOS'], errors='coerce')
df['VIGENCIA'] = pd.to_numeric(df['VIGENCIA'], errors='coerce')
df['km_norte_sur'] = pd.to_numeric(df['km_norte_sur'], errors='coerce')
df['km_este_oeste'] = pd.to_numeric(df['km_este_oeste'], errors='coerce')
df['distancia_total'] = pd.to_numeric(df['distancia_total'], errors='coerce')

In [None]:
missing = df[predictor_vars + [target_var]].isnull().sum()
if missing.sum() > 0:
    print(f"\nMissing values detected:")
    print(missing[missing > 0])
    df = df.dropna(subset=predictor_vars + [target_var])
    print(f"Records after removing missing: {len(df):,}")
else:
    print("\nNo missing values")

In [None]:
X = df[predictor_vars].copy()
y = df[target_var].values

predictor_cat_cols = ['SEXO', 'ETNIA', 'CICLO_VITAL', 'DISCAPACIDAD', 'ESTADO_DEPTO']
predictor_num_cols = ['EVENTOS', 'VIGENCIA', 'km_norte_sur', 'km_este_oeste', 'distancia_total']

print(f"\nDataset prepared:")
print(f"Features: {X.shape[1]}")
print(f"Samples: {len(X):,}")
print(f"Categorical: {predictor_cat_cols}")
print(f"Numeric: {predictor_num_cols}")

In [None]:
class_counts = pd.Series(y).value_counts()
print(f"\nClass distribution:")
print(f"Class 0 (Others): {class_counts.get(0,0):,} ({class_counts.get(0,0)/len(y)*100:.2f}%)")
print(f"Class 1 (Displacement): {class_counts.get(1,0):,} ({class_counts.get(1,0)/len(y)*100:.2f}%)")
if class_counts.get(1,0) > 0:
    print(f"Imbalance ratio: {class_counts.get(0,0)/class_counts.get(1,0):.2f}:1")

## 5.1. CATEGORICAL ENCODING FOR EMBEDDINGS

In [None]:
encoders = {}
X_cat_encoded = {}

for col in predictor_cat_cols:
    enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    X_cat_encoded[col] = enc.fit_transform(X[[col]].astype(str)).astype('int32').flatten()
    encoders[col] = enc
    n_unique = len(enc.categories_[0])
    print(f"  • {col}: {n_unique} categories")

joblib.dump(encoders, 'db/02b_neural_networks/saved_models/categorical_encoders.pkl')

## 5.2. EMBEDDING DIMENSIONS

In [None]:
embedding_info = {}

for col in predictor_cat_cols:
    n_categories = len(encoders[col].categories_[0])
    embed_dim = min(50, (n_categories + 1) // 2)
    
    embedding_info[col] = {
        'n_categories': n_categories,
        'embed_dim': embed_dim
    }
    print(f"{col}: {n_categories}: {embed_dim}D")

joblib.dump(embedding_info, 'db/02b_neural_networks/saved_models/embedding_info.pkl')

## 5.3. NUMERIC SCALING

In [None]:
# Usa el mismo pipeline de escalamiento que los modelos clásicos
numeric_report = []
scalers = {}

for col in predictor_num_cols:
    s = X[col].dropna().astype(float)
    n_obs = len(s)
    n_unique = s.nunique()
    mean, std = s.mean(), s.std()
    minimum, maximum = s.min(), s.max()
    skew = s.skew()
    q1, q3 = s.quantile(0.25), s.quantile(0.75)
    iqr = q3 - q1
    lower, upper = q1 - 1.5*iqr, q3 + 1.5*iqr
    outliers = ((s < lower) | (s > upper)).sum()
    outlier_ratio = outliers / n_obs if n_obs > 0 else 0

    # Log transform si existe mucha asimetría
    log_transform = (abs(skew) > 2) and (minimum >= 0)

    # Selección automática del escalador
    if outlier_ratio > 0.05:
        scaler_choice, scaler = 'RobustScaler', RobustScaler()
    else:
        if (minimum >= 0) and (maximum <= 1e6):
            scaler_choice, scaler = 'MinMaxScaler', MinMaxScaler()
        else:
            scaler_choice, scaler = 'StandardScaler', StandardScaler()

    col_data = X[[col]].astype(float).copy()
    if log_transform:
        col_data = np.log1p(col_data.clip(lower=0))
        col_data = col_data.replace([np.inf, -np.inf], np.nan).fillna(0)

    scaled = scaler.fit_transform(col_data.fillna(0))
    X[col] = scaled  # <-- Inserta en el DataFrame global

    numeric_report.append({
        'variable': col,
        'n_obs': int(n_obs),
        'n_unique': int(n_unique),
        'mean': float(mean),
        'std': float(std),
        'min': float(minimum),
        'max': float(maximum),
        'skew': float(skew),
        'outlier_ratio': float(outlier_ratio),
        'log_transform_applied': bool(log_transform),
        'scaler_chosen': scaler_choice
    })

    scalers[col] = scaler

# Guarda escaladores
joblib.dump(scalers, 'db/02b_neural_networks/saved_models/numeric_scalers.pkl')

# Matriz para ANN
X_num_scaled = X[predictor_num_cols].astype('float32').values


## 5.4. TRAIN/TEST SPLIT (70/30)

In [None]:
idx_all = np.arange(len(y))
idx_train, idx_test = train_test_split(idx_all, test_size=0.30, random_state=42, stratify=y)

X_train_cat = {col: X_cat_encoded[col][idx_train] for col in predictor_cat_cols}
X_test_cat = {col: X_cat_encoded[col][idx_test] for col in predictor_cat_cols}

X_train_num = X_num_scaled[idx_train]
X_test_num = X_num_scaled[idx_test]

## 5.5. COMBINING FEATURES

In [None]:
y_train = y[idx_train]
y_test = y[idx_test]

print(f"Train: {len(y_train):,}")
print(f"Test: {len(y_test):,}")

class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {int(k): float(v) for k, v in zip(np.unique(y_train), class_weights)}

print(f"\nClass weights: \n0={class_weight_dict[0]:.3f}, 1={class_weight_dict[1]:.3f}")

# 6. VALIDATION SPLIT

In [None]:
idx_subtrain = np.arange(len(y_train))
idx_subtrain_split, idx_val_split = train_test_split(
    idx_subtrain, test_size=0.2, random_state=42, stratify=y_train
)

X_cat_subtrain = {col: X_train_cat[col][idx_subtrain_split] for col in predictor_cat_cols}
X_cat_val = {col: X_train_cat[col][idx_val_split] for col in predictor_cat_cols}

X_num_subtrain = X_train_num[idx_subtrain_split]
X_num_val = X_train_num[idx_val_split]

y_subtrain = y_train[idx_subtrain_split]
y_val = y_train[idx_val_split]

print(f"  • Subtrain: {len(y_subtrain):,}")
print(f"  • Validation: {len(y_val):,}")

# 7. FOCAL LOSS

In [None]:
def focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
        pt = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        modulating_factor = K.pow(1.0 - pt, gamma)
        alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
        cross_entropy = -K.log(pt)
        loss = alpha_factor * modulating_factor * cross_entropy
        return K.mean(loss)
    return focal_loss_fixed

# 8. HELPER FUNCTIONS FOR COMPREHENSIVE METRICS

In [None]:
def calculate_nn_comprehensive_metrics(model, model_name, arch_name, 
                                       X_train_inputs, X_test_inputs,
                                       y_train, y_test, 
                                       training_time_minutes,
                                       batch_size=2048):
    
    y_prob_train = model.predict(X_train_inputs, batch_size=batch_size, verbose=0).flatten()
    y_pred_train = (y_prob_train > 0.5).astype(int)
    
    y_prob_test = model.predict(X_test_inputs, batch_size=batch_size, verbose=0).flatten()
    y_pred_test = (y_prob_test > 0.5).astype(int)
    
    inference_start = time.time()
    _ = model.predict(X_test_inputs, batch_size=batch_size, verbose=0)
    inference_time = (time.time() - inference_start) / len(y_test) * 1000
    
    cm_train = confusion_matrix(y_train, y_pred_train)
    cm_test = confusion_matrix(y_test, y_pred_test)
    tn, fp, fn, tp = cm_test.ravel()
    
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    train_precision = precision_score(y_train, y_pred_train, zero_division=0)
    test_precision = precision_score(y_test, y_pred_test, zero_division=0)
    train_recall = recall_score(y_train, y_pred_train, zero_division=0)
    test_recall = recall_score(y_test, y_pred_test, zero_division=0)
    train_f1 = f1_score(y_train, y_pred_train, zero_division=0)
    test_f1 = f1_score(y_test, y_pred_test, zero_division=0)
    
    train_specificity = cm_train[0, 0] / (cm_train[0, 0] + cm_train[0, 1]) if (cm_train[0, 0] + cm_train[0, 1]) > 0 else 0
    test_specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    train_gmean = np.sqrt(train_recall * train_specificity)
    test_gmean = np.sqrt(test_recall * test_specificity)
    
    train_mcc = matthews_corrcoef(y_train, y_pred_train)
    test_mcc = matthews_corrcoef(y_test, y_pred_test)
    
    train_balanced_acc = balanced_accuracy_score(y_train, y_pred_train)
    test_balanced_acc = balanced_accuracy_score(y_test, y_pred_test)
    
    train_kappa = cohen_kappa_score(y_train, y_pred_train)
    test_kappa = cohen_kappa_score(y_test, y_pred_test)
    
    test_roc_auc = roc_auc_score(y_test, y_prob_test)
    test_log_loss = log_loss(y_test, y_prob_test)
    test_brier_score = brier_score_loss(y_test, y_prob_test)
    
    model_size_mb = sum([np.prod(w.shape) * 4 for w in model.get_weights()]) / (1024 * 1024)
    
    total_params = model.count_params()
    trainable_params = sum([K.count_params(w) for w in model.trainable_weights])
    non_trainable_params = total_params - trainable_params
    
    accuracy_gap = train_accuracy - test_accuracy
    f1_gap = train_f1 - test_f1
    precision_gap = train_precision - test_precision
    recall_gap = train_recall - test_recall
    
    metrics = {
        'Model': model_name,
        'Architecture': arch_name,
        'Train_Accuracy': train_accuracy,
        'Train_Precision': train_precision,
        'Train_Recall': train_recall,
        'Train_F1': train_f1,
        'Train_Specificity': train_specificity,
        'Train_G_Mean': train_gmean,
        'Train_Balanced_Accuracy': train_balanced_acc,
        'Train_MCC': train_mcc,
        'Train_Kappa': train_kappa,
        'Test_Accuracy': test_accuracy,
        'Test_Precision': test_precision,
        'Test_Recall': test_recall,
        'Test_F1': test_f1,
        'Test_Specificity': test_specificity,
        'Test_G_Mean': test_gmean,
        'Test_Balanced_Accuracy': test_balanced_acc,
        'Test_MCC': test_mcc,
        'Test_Kappa': test_kappa,
        'Test_ROC_AUC': test_roc_auc,
        'Test_Log_Loss': test_log_loss,
        'Test_Brier_Score': test_brier_score,
        'Test_True_Negatives': int(tn),
        'Test_False_Positives': int(fp),
        'Test_False_Negatives': int(fn),
        'Test_True_Positives': int(tp),
        'Test_TN_Percentage': (tn / len(y_test)) * 100,
        'Test_FP_Percentage': (fp / len(y_test)) * 100,
        'Test_FN_Percentage': (fn / len(y_test)) * 100,
        'Test_TP_Percentage': (tp / len(y_test)) * 100,
        'Accuracy_Gap_Train_Test': accuracy_gap,
        'F1_Gap_Train_Test': f1_gap,
        'Precision_Gap_Train_Test': precision_gap,
        'Recall_Gap_Train_Test': recall_gap,
        'Training_Time_Minutes': training_time_minutes,
        'Inference_Time_ms_per_sample': inference_time,
        'Model_Size_MB': model_size_mb,
        'Total_Parameters': int(total_params),
        'Trainable_Parameters': int(trainable_params),
        'Non_Trainable_Parameters': int(non_trainable_params),
        'Total_Train_Samples': len(y_train),
        'Total_Test_Samples': len(y_test)
    }
    
    return metrics, y_pred_test, y_prob_test


def build_tf_datasets(X_cat_dict_train, X_num_train, y_train,
                      X_cat_dict_val, X_num_val, y_val,
                      predictor_cat_cols,
                      batch_size=1024, shuffle_buffer=100000):
    
    def dicts_to_list(cat_dict, num_array):
        arrs = [cat_dict[col].astype('int32') for col in predictor_cat_cols]
        if (num_array is not None) and (num_array.size > 0):
            arrs.append(num_array.astype('float32'))
        return arrs

    X_train_list = dicts_to_list(X_cat_dict_train, X_num_train)
    X_val_list = dicts_to_list(X_cat_dict_val, X_num_val)

    train_ds = Dataset.from_tensor_slices((tuple(X_train_list), y_train.astype('float32')))
    val_ds = Dataset.from_tensor_slices((tuple(X_val_list), y_val.astype('float32')))

    train_ds = train_ds.shuffle(shuffle_buffer).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return train_ds, val_ds

# 9. NETWORK ARCHITECTURES

In [None]:
def build_resnet_style(embedding_info, num_numeric_feats, predictor_cat_cols,
                       num_blocks=3, base_units=256, dropout_rate=0.3,
                       learning_rate=1e-3):
    inputs = []
    embed_outputs = []

    for col in predictor_cat_cols:
        info = embedding_info[col]
        n_cat = info['n_categories']
        d_emb = info['embed_dim']

        inp = Input(shape=(1,), name=f"inp_{col}")
        emb = Embedding(input_dim=n_cat, output_dim=d_emb, name=f"emb_{col}")(inp)
        flat = Flatten()(emb)
        inputs.append(inp)
        embed_outputs.append(flat)

    if num_numeric_feats > 0:
        num_inp = Input(shape=(num_numeric_feats,), name="inp_numeric")
        inputs.append(num_inp)
        embed_outputs.append(num_inp)

    x = Concatenate()(embed_outputs) if len(embed_outputs) > 1 else embed_outputs[0]

    x = Dense(base_units, activation='relu', name='initial_dense')(x)
    x = BatchNormalization(name='initial_bn')(x)

    for i in range(num_blocks):
        shortcut = x
        
        x = Dense(base_units, activation='relu', name=f'block_{i+1}_dense_1')(x)
        x = BatchNormalization(name=f'block_{i+1}_bn_1')(x)
        x = Dropout(dropout_rate, name=f'block_{i+1}_dropout_1')(x)
        
        x = Dense(base_units, activation='relu', name=f'block_{i+1}_dense_2')(x)
        x = BatchNormalization(name=f'block_{i+1}_bn_2')(x)
        
        x = Add(name=f'block_{i+1}_add')([x, shortcut])
        x = Dropout(dropout_rate, name=f'block_{i+1}_dropout_2')(x)

    output = Dense(1, activation='sigmoid', name='output')(x)

    model = Model(inputs=inputs, outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss=focal_loss(gamma=2.0, alpha=0.25),
        metrics=[
            tf.keras.metrics.AUC(name='auc_roc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.BinaryAccuracy(name='accuracy')
        ]
    )
    
    return model


def build_wide_and_deep(embedding_info, num_numeric_feats, predictor_cat_cols,
                        deep_layers=[512, 256, 128], dropout_rate=0.3,
                        learning_rate=1e-3):
    inputs = []
    embed_outputs = []
    wide_inputs = []

    for col in predictor_cat_cols:
        info = embedding_info[col]
        n_cat = info['n_categories']
        d_emb = info['embed_dim']

        inp = Input(shape=(1,), name=f"inp_{col}")
        
        emb = Embedding(input_dim=n_cat, output_dim=d_emb, name=f"emb_{col}")(inp)
        flat = Flatten()(emb)
        embed_outputs.append(flat)
        
        wide_inputs.append(inp)
        inputs.append(inp)

    if num_numeric_feats > 0:
        num_inp = Input(shape=(num_numeric_feats,), name="inp_numeric")
        inputs.append(num_inp)
        embed_outputs.append(num_inp)
        wide_inputs.append(num_inp)

    deep = Concatenate(name='deep_concat')(embed_outputs) if len(embed_outputs) > 1 else embed_outputs[0]
    
    for i, units in enumerate(deep_layers):
        deep = Dense(units, activation='relu', name=f"deep_dense_{i+1}")(deep)
        deep = BatchNormalization(name=f"deep_bn_{i+1}")(deep)
        deep = Dropout(dropout_rate, name=f"deep_dropout_{i+1}")(deep)

    wide = Concatenate(name='wide_concat')(wide_inputs) if len(wide_inputs) > 1 else wide_inputs[0]
    wide = Flatten(name='wide_flatten')(wide)

    combined = Concatenate(name='wide_deep_concat')([wide, deep])
    output = Dense(1, activation='sigmoid', name='output')(combined)

    model = Model(inputs=inputs, outputs=output)
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss=focal_loss(gamma=2.0, alpha=0.25),
        metrics=[
            tf.keras.metrics.AUC(name='auc_roc'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.BinaryAccuracy(name='accuracy')
        ]
    )
    
    return model

# 8. DEFINING MODELS AND ARCHITECTURES

Due to the computer used for training not having a high capacity to train all models at once, a comment and uncomment strategy was implemented to train three architectures per model. To execute, the three architectures to be trained must be uncommented, and the rest must be left commented. All information is saved and will not be lost when training other models later.

In [None]:
num_num_feats = X_train_num.shape[1]

network_configs = {


    # 'ResNet_Style': {
    #     'network_type': 'resnet_style',
    #     'architectures': [
    #         {
    #             'name': 'ResNet_Style_Architecture_1',
    #             'params': {
    #                 'num_blocks': 2,
    #                 'base_units': 256,
    #                 'dropout_rate': 0.3,
    #                 'learning_rate': 1e-3,
    #                 'batch_size': 2048,
    #                 'epochs': 200
    #             }
    #         },
    #         {
    #             'name': 'ResNet_Style_Architecture_2',
    #             'params': {
    #                 'num_blocks': 3,
    #                 'base_units': 384,
    #                 'dropout_rate': 0.3,
    #                 'learning_rate': 5e-4,
    #                 'batch_size': 2048,
    #                 'epochs': 200
    #             }
    #         },
    #         {
    #             'name': 'ResNet_Style_Architecture_3',
    #             'params': {
    #                 'num_blocks': 4,
    #                 'base_units': 512,
    #                 'dropout_rate': 0.4,
    #                 'learning_rate': 1e-4,
    #                 'batch_size': 4096,
    #                 'epochs': 200
    #             }
    #         }
    #     ]
    # },
    
    'Deep': {
        'network_type': 'wide_and_deep',
        'architectures': [
            {
                'name': 'Deep_Architecture_1',
                'params': {
                    'deep_layers': [256, 128],
                    'dropout_rate': 0.3,
                    'learning_rate': 1e-3,
                    'batch_size': 2048,
                    'epochs': 200
                }
            },
            {
                'name': 'Deep_Architecture_2',
                'params': {
                    'deep_layers': [512, 256, 128],
                    'dropout_rate': 0.3,
                    'learning_rate': 5e-4,
                    'batch_size': 2048,
                    'epochs': 200
                }
            },
            {
                'name': 'Deep_Architecture_3',
                'params': {
                    'deep_layers': [1024, 512, 256, 128],
                    'dropout_rate': 0.4,
                    'learning_rate': 1e-4,
                    'batch_size': 4096,
                    'epochs': 200
                }
            }
        ]
    }
}

active_networks = list(network_configs.keys())
if len(active_networks) != 1:
    raise ValueError(f"ERROR: {len(active_networks)} networks active, need exactly 1")

print(f"\nActive: {active_networks[0]}")

# 9. TRAIN ARCHITECTURES WITH ACCUMULATION

In [None]:
for network_name, config in network_configs.items():
    print(f"\n{'='*80}")
    print(f"NETWORK: {network_name}")
    print(f"{'='*80}")
    
    network_type = config['network_type']
    architecture_results = []
    trained_models = []
    
    for arch_idx, architecture in enumerate(config['architectures'], 1):
        arch_name = architecture['name']
        arch_params = architecture['params']
        
        print(f"\n{'-'*80}")
        print(f"[{arch_idx}/3] {arch_name}")
        print(f"{'-'*80}")
        
        batch_size = arch_params['batch_size']
        train_ds, val_ds = build_tf_datasets(
            X_cat_subtrain, X_num_subtrain, y_subtrain,
            X_cat_val, X_num_val, y_val,
            predictor_cat_cols,
            batch_size=batch_size
        )
        
        print(f"Building {network_type}...")
        
        if network_type == 'resnet_style':
            model = build_resnet_style(
                embedding_info, num_num_feats, predictor_cat_cols,
                num_blocks=arch_params['num_blocks'],
                base_units=arch_params['base_units'],
                dropout_rate=arch_params['dropout_rate'],
                learning_rate=arch_params['learning_rate']
            )
        elif network_type == 'wide_and_deep':
            model = build_wide_and_deep(
                embedding_info, num_num_feats, predictor_cat_cols,
                deep_layers=arch_params['deep_layers'],
                dropout_rate=arch_params['dropout_rate'],
                learning_rate=arch_params['learning_rate']
            )
        
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=1e-7, verbose=1)
        ]
        
        print(f"  Training up to {arch_params['epochs']} epochs...")
        start_time = datetime.now()
        
        history = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=arch_params['epochs'],
            callbacks=callbacks,
            class_weight=class_weight_dict,
            verbose=1
        )
        
        training_time = (datetime.now() - start_time).total_seconds() / 60
        
        X_train_inputs = [X_train_cat[col].astype('int32') for col in predictor_cat_cols]
        X_train_inputs.append(X_train_num.astype('float32'))
        X_train_inputs = tuple(X_train_inputs)
        
        X_test_inputs = [X_test_cat[col].astype('int32') for col in predictor_cat_cols]
        X_test_inputs.append(X_test_num.astype('float32'))
        X_test_inputs = tuple(X_test_inputs)
        
        print(f"  Calculating metrics...")
        metrics, y_pred_test, y_prob_test = calculate_nn_comprehensive_metrics(
            model, network_name, arch_name,
            X_train_inputs, X_test_inputs,
            y_train, y_test,
            training_time,
            batch_size=batch_size
        )
        print(f"\nRESULTS:")
        print(f"Training time: {metrics['Training_Time_Minutes']:.2f} min \n")

        print(f"Train Precision: {metrics['Train_Precision']:.4f}")
        print(f"Train Recall: {metrics['Train_Recall']:.4f}")
        print(f"Train Accuracy: {metrics['Train_Accuracy']:.4f}")
        print(f"Train F1: {metrics['Train_F1']:.4f}")
        print(f"Train MCC: {metrics['Train_MCC']:.4f}\n")


        print(f"Test Precision: {metrics['Test_Precision']:.4f}")
        print(f"Test Recall: {metrics['Test_Recall']:.4f}")
        print(f"Test Accuracy: {metrics['Test_Accuracy']:.4f}")
        print(f"Test F1: {metrics['Test_F1']:.4f}")
        print(f"Test MCC: {metrics['Test_MCC']:.4f}")
        print(f"Test ROC AUC: {metrics['Test_ROC_AUC']:.4f}\n")
        
        architecture_results.append(metrics)
        trained_models.append({
            'architecture': arch_name,
            'model': model,
            'metrics': metrics,
            'y_pred_test': y_pred_test,
            'y_prob_test': y_prob_test,
            'history': history
        })
        
        # Save individual
        pd.DataFrame([metrics]).to_excel(
            f'db/02b_neural_networks/metrics/by_architecture/{network_name}_{arch_name}_comprehensive_metrics.xlsx', 
            index=False, engine='openpyxl')
        
        cm = confusion_matrix(y_test, y_pred_test)
        pd.DataFrame(cm, index=['True_Others', 'True_Displacement'],
                     columns=['Pred_Others', 'Pred_Displacement']).to_excel(
            f'db/02b_neural_networks/model_data/confusion_matrices/by_architecture/{network_name}_{arch_name}_confusion_matrix.xlsx', 
            engine='openpyxl')
        
        fpr, tpr, thresholds = roc_curve(y_test, y_prob_test)
        pd.DataFrame({'FPR': fpr, 'TPR': tpr, 'Thresholds': thresholds, 'AUC': auc(fpr, tpr)}).to_excel(
            f'db/02b_neural_networks/model_data/roc_data/by_architecture/{network_name}_{arch_name}_roc_curve.xlsx', 
            index=False, engine='openpyxl')
        
        report = classification_report(y_test, y_pred_test, target_names=['Others', 'Displacement'], output_dict=True)
        pd.DataFrame(report).transpose().to_excel(
            f'db/02b_neural_networks/metrics/by_architecture/{network_name}_{arch_name}_classification_report.xlsx', 
            engine='openpyxl')
        
        pd.DataFrame(history.history).to_excel(
            f'db/02b_neural_networks/model_data/learning_curves/by_architecture/{network_name}_{arch_name}_learning_curves.xlsx', 
            index=False, engine='openpyxl')
        
        pd.DataFrame([{'network_type': network_type, **arch_params}]).to_excel(
            f'db/02b_neural_networks/model_data/hyperparameters/{network_name}_{arch_name}_hyperparameters.xlsx', 
            index=False, engine='openpyxl')
        
        print(f"Saved")
    
    # Compare
    print(f"\n{'='*80}")
    print(f"COMPARING")
    print(f"{'='*80}")
    
    comparison_df = pd.DataFrame(architecture_results).sort_values('Test_F1', ascending=False)
    comparison_df.to_excel(
        f'db/02b_neural_networks/model_data/architecture_comparisons/{network_name}_architectures_comparison.xlsx', 
        index=False, engine='openpyxl')
    
    print(f"\nRanked:")
    for idx, row in comparison_df.iterrows():
        print(f"  {idx+1}. {row['Architecture']}: F1={row['Test_F1']:.4f}")
    
    best_arch_name = comparison_df.iloc[0]['Architecture']
    print(f"\nBEST: {best_arch_name}")
    
    best_model_data = None
    for trained_data in trained_models:
        if trained_data['architecture'] == best_arch_name:
            best_model_data = trained_data
            break
    
    # Save best
    print(f"\n{'='*80}")
    print(f"SAVING BEST")
    print(f"{'='*80}")
    
    best_model_data['model'].save(f'db/02b_neural_networks/saved_models/{network_name}_best_model.keras')
    
    pd.DataFrame({
        'Sample_Index': range(len(y_test)),
        'True_Label': y_test,
        'Predicted_Label': best_model_data['y_pred_test'],
        'Probability_Class_1': best_model_data['y_prob_test'],
        'Correct': y_test == best_model_data['y_pred_test']
    }).to_csv(f'db/02b_neural_networks/predictions/{network_name}_best_predictions.csv', index=False)
    
    cm = confusion_matrix(y_test, best_model_data['y_pred_test'])
    pd.DataFrame(cm, index=['True_Others', 'True_Displacement'],
                 columns=['Pred_Others', 'Pred_Displacement']).to_excel(
        f'db/02b_neural_networks/model_data/confusion_matrices/{network_name}_best_confusion_matrix.xlsx', 
        engine='openpyxl')
    
    fpr, tpr, thresholds = roc_curve(y_test, best_model_data['y_prob_test'])
    pd.DataFrame({'FPR': fpr, 'TPR': tpr, 'Thresholds': thresholds, 'AUC': auc(fpr, tpr)}).to_excel(
        f'db/02b_neural_networks/model_data/roc_data/{network_name}_best_roc_curve.xlsx', 
        index=False, engine='openpyxl')
    
    pd.DataFrame([best_model_data['metrics']]).to_excel(
        f'db/02b_neural_networks/metrics/{network_name}_best_comprehensive_metrics.xlsx', 
        index=False, engine='openpyxl')
    
    report = classification_report(y_test, best_model_data['y_pred_test'], 
                                   target_names=['Others', 'Displacement'], output_dict=True)
    pd.DataFrame(report).transpose().to_excel(
        f'db/02b_neural_networks/metrics/{network_name}_best_classification_report.xlsx', 
        engine='openpyxl')
    
    pd.DataFrame(best_model_data['history'].history).to_excel(
        f'db/02b_neural_networks/model_data/learning_curves/{network_name}_best_learning_curves.xlsx', 
        index=False, engine='openpyxl')
    
    best_params = None
    for arch in config['architectures']:
        if arch['name'] == best_arch_name:
            best_params = {'network_type': network_type, **arch['params']}
            break
    if best_params:
        pd.DataFrame([best_params]).to_excel(
            f'db/02b_neural_networks/model_data/hyperparameters/{network_name}_best_hyperparameters.xlsx', 
            index=False, engine='openpyxl')
    
    print(f"Best saved")
    
    # ACCUMULATION
    print(f"\n{'='*80}")
    print(f"ACCUMULATION")
    print(f"{'='*80}")
    
    # All architectures
    all_arch_path = 'db/02b_neural_networks/metrics/all_architectures_tested_comprehensive.xlsx'
    new_arch_results = pd.DataFrame(architecture_results)
    
    if os.path.exists(all_arch_path):
        print(f"Loading previous...")
        previous_arch = pd.read_excel(all_arch_path)
        previous_arch = previous_arch[previous_arch['Model'] != network_name]
        all_arch_combined = pd.concat([previous_arch, new_arch_results], ignore_index=True)
        print(f"Previous: {len(previous_arch)} | New: {len(new_arch_results)} | Total: {len(all_arch_combined)}")
    else:
        print(f"Creating new...")
        all_arch_combined = new_arch_results
        print(f"Total: {len(all_arch_combined)}")
    
    all_arch_combined.to_excel(all_arch_path, index=False, engine='openpyxl')
    print(f"Saved")
    
    # Best models
    best_models_path = 'db/02b_neural_networks/comparative_tables/best_models_comparison_complete.xlsx'
    new_best_model = pd.DataFrame([best_model_data['metrics']])
    
    if os.path.exists(best_models_path):
        print(f"Loading previous best...")
        previous_best = pd.read_excel(best_models_path)
        previous_best = previous_best[previous_best['Model'] != network_name]
        best_combined = pd.concat([previous_best, new_best_model], ignore_index=True)
        best_combined = best_combined.sort_values('Test_F1', ascending=False).reset_index(drop=True)
        print(f"Previous: {len(previous_best)} | New: 1 | Total: {len(best_combined)}")
    else:
        print(f"Creating new...")
        best_combined = new_best_model
        print(f"Total: {len(best_combined)}")
    
    best_combined.to_excel(best_models_path, index=False, engine='openpyxl')
    print(f"Saved")
    
    # Publication-ready
    pub_cols = [
        'Model', 'Architecture', 'Test_Accuracy', 'Test_Precision', 'Test_Recall', 'Test_F1',
        'Test_Specificity', 'Test_G_Mean', 'Test_MCC', 'Test_Balanced_Accuracy',
        'Test_ROC_AUC', 'Test_Kappa', 'Test_Log_Loss',
        'Training_Time_Minutes', 'Inference_Time_ms_per_sample',
        'Model_Size_MB', 'Total_Parameters', 'F1_Gap_Train_Test'
    ]
    pub_table = best_combined[pub_cols].copy()
    numeric_cols = pub_table.select_dtypes(include=[np.number]).columns
    pub_table[numeric_cols] = pub_table[numeric_cols].round(4)
    pub_table.to_excel('db/02b_neural_networks/comparative_tables/best_models_comparison_publication_ready.xlsx', 
                       index=False, engine='openpyxl')
    
    print(f"\nAccumulation completed")
    print(f"Network added: {network_name}")
    print(f"Total networks: {len(best_combined)}")

# 10. TRAINING COMPLETED

In [None]:
best_models_path = 'db/02b_neural_networks/comparative_tables/best_models_comparison_complete.xlsx'
if os.path.exists(best_models_path):
    current_best = pd.read_excel(best_models_path).sort_values('Test_F1', ascending=False)
    
    print(f"\nCURRENT STATE:")
    print(f"Total networks: {len(current_best)}")
    print(f"\nTop 3:")
    for idx, row in current_best.head(3).iterrows():
        print(f"    {idx+1}. {row['Model']}: F1={row['Test_F1']:.4f}")
    
    print(f"\nAll trained:")
    for network in current_best['Model'].tolist():
        print(f"{network}")
else:
    print("\nNo networks found")

print("NEXT: Comment current network, uncomment next, run again")

In [27]:
import numpy
import tensorflow
import sklearn
print(f"numpy: {numpy.__version__}")
print(f"tensorflow: {tensorflow.__version__}")
print(f"sklearn: {sklearn.__version__}")

numpy: 2.1.3
tensorflow: 2.20.0
sklearn: 1.7.2
