# Chicago Crime Prediction Project - 05Model Development Strategy (PyTorch)

In [15]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import pickle
import warnings
from tqdm import tqdm
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, precision_recall_curve
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as ImbPipeline
import joblib
import json

In [16]:
# Random Seed
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
    
# Data Path
DATA_DIR = "data/"
PROCESSED_DIR = "data/processed/"
FEATURES_DIR = "data/features/"
MODELS_DIR = "models/"
RESULTS_DIR = "results/"

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


## Some Useful Functions

In [18]:
# Load Data
def load_feature_data(filename):
    filepath = os.path.join(FEATURES_DIR, filename)
    
    if os.path.exists(filepath):
        if filename.endswith('.csv'):
            return pd.read_csv(filepath, parse_dates=['date'])
        elif filename.endswith('.parquet'):
            return pd.read_parquet(filepath)
        elif filename.endswith('.pkl'):
            with open(filepath, 'rb') as f:
                return pickle.load(f)
        else:
            print(f"Unsupported file format: {filename}")
            return None
    else:
        print(f"File not found: {filepath}")
        return None

# Save Model
def save_model(model, model_name, info=None):

    filepath = os.path.join(MODELS_DIR, f"{model_name}.pkl")

    try:
        if isinstance(model, nn.Module):
            torch.save(model.state_dict(), os.path.join(MODELS_DIR, f"{model_name}.pt"))
        else:
            joblib.dump(model, filepath)
        
        print(f"Model saved to {filepath}")
        
        if info:
            info_filepath = os.path.join(MODELS_DIR, f"{model_name}_info.json")
            info_df = pd.DataFrame([info])
            info_df.to_json(info_filepath, orient='records')
            print(f"Model info saved to {info_filepath}")
        
        return True
    except Exception as e:
        print(f"Error saving model: {e}")
        return False

# Save Evaluation Results
def save_evaluation_results(results, model_name):
    filepath = os.path.join(RESULTS_DIR, f"{model_name}_evaluation.txt")
    
    with open(filepath, 'w') as f:
        f.write(f"Evaluation Results for {model_name}\n")
        f.write("="*50 + "\n\n")
        
        for key, value in results.items():
            if isinstance(value, dict):
                f.write(f"{key}:\n")
                for k, v in value.items():
                    f.write(f"  {k}: {v}\n")
            else:
                f.write(f"{key}: {value}\n")
        
    print(f"Evaluation results saved to {filepath}")
    
#------ Prepare Datasets ------#

def prepare_datasets(df, target_col, test_size=0.3, random_state=42, features_to_drop=None):
    
    data = df.copy()
    
    if target_col not in data.columns:
        print(f"Error: Target column '{target_col}' not found in the dataset")
        return None
    
    if features_to_drop:
        for col in features_to_drop:
            if col in data.columns and col != target_col:
                data = data.drop(col, axis=1)
    
    # Target Variable
    y = data[target_col]
    X = data.drop(target_col, axis=1)

    categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
    numerical_cols = X.select_dtypes(include=['int', 'float']).columns.tolist()
    
    print(f"Features: {len(X.columns)} total, {len(categorical_cols)} categorical, {len(numerical_cols)} numerical")
    
    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y if len(np.unique(y)) < 50 else None
    )
    
    print(f"Training set: {X_train.shape[0]} samples")
    print(f"Test set: {X_test.shape[0]} samples")
    
    # Check Target Distribution
    target_distribution = pd.Series(y_train).value_counts(normalize=True)
    print(f"Target distribution in training set:")
    for target, percentage in target_distribution.items():
        print(f"  {target}: {percentage:.2%}")
    
    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_test': X_test,
        'y_test': y_test,
        'categorical_cols': categorical_cols,
        'numerical_cols': numerical_cols,
        'target_col': target_col
    }
    
# Handle Class Imbalance
def handle_class_imbalance(X_train, y_train, method='smote', sampling_strategy='auto', random_state=42):

    print(f"Handling class imbalance using {method}...")
    
    if method == 'smote':
        smote = SMOTE(sampling_strategy=sampling_strategy, random_state=random_state)
        X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
        
    elif method == 'undersampling':
        under_sampler = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=random_state)
        X_resampled, y_resampled = under_sampler.fit_resample(X_train, y_train)
        
    elif method == 'both':
        over_sampler = SMOTE(sampling_strategy=sampling_strategy, random_state=random_state)
        under_sampler = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=random_state)
        

        X_over, y_over = over_sampler.fit_resample(X_train, y_train)
        X_resampled, y_resampled = under_sampler.fit_resample(X_over, y_over)
        
    else:
        print("Invalid method. Using original data.")
        return X_train, y_train
    
    # Calculate Class Distribution
    resampled_dist = pd.Series(y_resampled).value_counts(normalize=True)
    print("Class distribution after resampling:")
    for cls, pct in resampled_dist.items():
        print(f"  {cls}: {pct:.2%}")
    
    return X_resampled, y_resampled

# Create Preprocessing Pipeline
def create_preprocessing_pipeline(categorical_cols, numerical_cols):
    numerical_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])
    
    categorical_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))  
    ])
    
    preprocessor = ColumnTransformer([
        ('numerical', numerical_pipeline, numerical_cols),
        ('categorical', categorical_pipeline, categorical_cols)
    ])
    
    return preprocessor

def get_data_sample(df):

    sample_size = min(100000, len(df))
    use_full = input(f"Use full dataset ({len(df)} records) for model development? (y/n, default: n): ")
    
    if use_full.lower() == 'y':
        print(f"Using full dataset with {len(df)} records.")
        return df
    else:
        print(f"Using {sample_size} records for model development.")
        return df.sample(n=sample_size, random_state=42)

def get_problem_config(df):
    # Show available targets
    potential_targets = ['crime_category', 'crime_type', 'is_arrest', 'is_domestic', 'threat_level']
    available_targets = [col for col in potential_targets if col in df.columns]
    
    print("\nAvailable target variables:")
    for i, col in enumerate(available_targets, 1):
        unique_vals = df[col].nunique()
        print(f"{i}. {col} ({unique_vals} unique values)")
    
    target_idx = 0
    target_choice = input(f"Choose target variable (1-{len(available_targets)}, default: 1): ")
    try:
        target_idx = int(target_choice) - 1 if target_choice else 0
    except ValueError:
        pass
        
    target_col = available_targets[target_idx]
    
    # Determine problem type
    unique_vals = df[target_col].nunique()
    class_problem = 'binary' if unique_vals == 2 else 'multiclass' if unique_vals < 100 else 'regression'

    problem_choice = input(f"Confirm problem type (binary/multiclass/regression, default: {class_problem}): ")
    if problem_choice in ['binary', 'multiclass', 'regression']:
        class_problem = problem_choice
        
    print(f"Using {target_col} as target with {class_problem} problem type")
    return target_col, class_problem

def save_final_results(performance, target_col, class_problem):

    results_file = os.path.join(RESULTS_DIR, 'final_model_performance.json')
    
    results = {
        'target_variable': target_col,
        'problem_type': class_problem,
        'model_performance': performance,
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }
    
    with open(results_file, 'w') as f:
        json.dump(results, f, indent=4)
    print(f"Final results saved to {results_file}")

def handle_class_imbalance_if_needed(datasets, class_problem):

    if class_problem == 'regression':
        return datasets
        
    y_train = datasets['y_train']
    class_distribution = pd.Series(y_train).value_counts(normalize=True)
    
    imbalance_threshold = 0.2 
    min_class_ratio = class_distribution.min()
    
    if min_class_ratio < imbalance_threshold:
        print(f"\nDetected class imbalance (minimum class ratio: {min_class_ratio:.2%})")
        print("Current class distribution:")
        for cls, ratio in class_distribution.items():
            print(f"  Class {cls}: {ratio:.2%}")
            
        # ask user if they want to handle class imbalance
        handle_imbalance = input("\nWould you like to handle class imbalance? (y/n, default: y): ")
        
        if handle_imbalance.lower() != 'n':
            print("\nAvailable methods:")
            print("1. SMOTE (over-sampling)")
            print("2. Random under-sampling")
            print("3. Combined (SMOTE + under-sampling)")
            
            method_choice = input("Choose method (1-3, default: 1): ")
            
            method_map = {
                '1': 'smote',
                '2': 'undersampling',
                '3': 'both'
            }
            
            method = method_map.get(method_choice, 'smote')

            X_resampled, y_resampled = handle_class_imbalance(
                datasets['X_train'], 
                datasets['y_train'],
                method=method
            )

            datasets['X_train'] = X_resampled
            datasets['y_train'] = y_resampled
            
            print("\nClass balance has been handled.")
            new_distribution = pd.Series(y_resampled).value_counts(normalize=True)
            print("New class distribution:")
            for cls, ratio in new_distribution.items():
                print(f"  Class {cls}: {ratio:.2%}")
    
    return datasets

## Fine-Tuning models

In [20]:
def optimize_hyperparameters(datasets, model_type='random_forest', class_problem='multiclass'):

    print(f"\n--- Optimizing Hyperparameters for {model_type} ---")
    
    X_train, y_train = datasets['X_train'], datasets['y_train']
    categorical_cols = datasets['categorical_cols']
    numerical_cols = datasets['numerical_cols']

    preprocessor = create_preprocessing_pipeline(categorical_cols, numerical_cols)
    
    param_grid = {}
    
    if model_type == 'random_forest':
        model = RandomForestClassifier(random_state=42)
        param_grid = {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__max_depth': [None, 10, 20, 30],
            'classifier__min_samples_split': [2, 5, 10],
            'classifier__min_samples_leaf': [1, 2, 4]
        }
    elif model_type == 'gradient_boosting':
        model = GradientBoostingClassifier(random_state=42)
        param_grid = {
            'classifier__n_estimators': [50, 100, 200],
            'classifier__learning_rate': [0.01, 0.1, 0.2],
            'classifier__max_depth': [3, 5, 7],
            'classifier__min_samples_split': [2, 5, 10],
            'classifier__min_samples_leaf': [1, 2, 4]
        }
    else:
        print(f"Unsupported model type: {model_type}")
        return None
    
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])

    if class_problem in ['binary', 'multiclass'] and len(np.unique(y_train)) < 10:
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    else:
        cv = 5
    
    # Grid Search 
    grid_search = GridSearchCV(
        pipeline, param_grid,
        cv=cv, 
        scoring='accuracy' if class_problem in ['binary', 'multiclass'] else 'neg_mean_squared_error',
        verbose=1, n_jobs=-1
    )
    
    grid_search.fit(X_train, y_train)
    
    # Print Best Parameters
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {grid_search.best_score_:.4f}")

    best_model = grid_search.best_estimator_
    save_model(best_model, f"optimized_{model_type}", {
        'model_type': model_type,
        'best_params': grid_search.best_params_,
        'best_score': grid_search.best_score_,
        'date_trained': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    })
    
    return {
        'best_model': best_model,
        'best_params': grid_search.best_params_,
        'best_score': grid_search.best_score_,
        'cv_results': grid_search.cv_results_
    }


def optimize_if_requested(datasets, ml_results, class_problem):

    print("\nModel Performance Summary:")
    for model_name, results in ml_results.items():
        if class_problem in ['binary', 'multiclass']:
            print(f"{model_name}:")
            print(f"  Accuracy: {results['accuracy']:.4f}")
            print(f"  F1 Score: {results['f1_score']:.4f}")
        else:
            print(f"{model_name}:")
            print(f"  RMSE: {results['rmse']:.4f}")
            print(f"  R²: {results['r2']:.4f}")
    
    optimize = input("\nWould you like to optimize hyperparameters for any model? (y/n, default: n): ")
    
    if optimize.lower() == 'y':
        print("\nAvailable models for optimization:")
        print("1. Random Forest")
        print("2. Gradient Boosting")
        
        model_choice = input("Choose model to optimize (1-2, default: 1): ")
        
        model_map = {
            '1': 'random_forest',
            '2': 'gradient_boosting'
        }
        
        model_type = model_map.get(model_choice, 'random_forest')
        
        print(f"\nOptimizing {model_type}...")
        optimization_results = optimize_hyperparameters(
            datasets,
            model_type=model_type,
            class_problem=class_problem
        )
        
        if optimization_results:
            ml_results[model_type] = {
                'model': optimization_results['best_model'],
                'accuracy': optimization_results['best_score'],
                'parameters': optimization_results['best_params']
            }
            
            print(f"\n{model_type} has been optimized.")
            print(f"Best score: {optimization_results['best_score']:.4f}")
            print("Best parameters:")
            for param, value in optimization_results['best_params'].items():
                print(f"  {param}: {value}")
    
    return ml_results

## Traditional Models

In [22]:
#------ Train Traditional ML Models ------#

def train_traditional_models(datasets, class_problem='multiclass'):
    print("\n--- Training Traditional ML Models ---")
    
    X_train, y_train = datasets['X_train'], datasets['y_train']
    X_test, y_test = datasets['X_test'], datasets['y_test']
    categorical_cols = datasets['categorical_cols']
    numerical_cols = datasets['numerical_cols']
    
    preprocessor = create_preprocessing_pipeline(categorical_cols, numerical_cols)
    
    models = {}
    
    if class_problem in ['binary', 'multiclass']:
        
        # Random Forest
        rf_model = Pipeline([
            ('preprocessor', preprocessor),
            ('classifier', RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1))
        ])
        models['random_forest'] = rf_model
        
        # Gradient Boosting
        gb_model = Pipeline([
            ('preprocessor', preprocessor),
            ('classifier', GradientBoostingClassifier(n_estimators=100, random_state=42))
        ])
        models['gradient_boosting'] = gb_model
    
    else:
        print("Regression models are not implemented in this version.")
        return {}
    
    # Train and Evaluate Models
    results = {}
    
    for name, model in models.items():
        print(f"\nTraining {name}...")
        
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        if class_problem in ['binary', 'multiclass']:

            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')
            
            try:
                y_prob = model.predict_proba(X_test)
                roc_auc = roc_auc_score(y_test, y_prob, multi_class='ovr', average='weighted')
            except:
                roc_auc = None
            
            print(f"Accuracy: {accuracy:.4f}")
            print(f"Precision: {precision:.4f}")
            print(f"Recall: {recall:.4f}")
            print(f"F1 Score: {f1:.4f}")
            if roc_auc:
                print(f"ROC AUC: {roc_auc:.4f}")
            
            save_model(model, f"ml_{name}", {
                'model_type': name,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1,
                'roc_auc': roc_auc
                if roc_auc else None,
                'date_trained': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            })

            report = classification_report(y_test, y_pred, output_dict=True)
            
            # Save Evaluation Results
            evaluation_results = {
                'Model': name,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall,
                'F1 Score': f1,
                'ROC AUC': roc_auc if roc_auc else None,
                'Classification Report': report,
                'Training Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            save_evaluation_results(evaluation_results, f"ml_{name}")
            
            # Save Results
            results[name] = {
                'model': model,
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1,
                'roc_auc': roc_auc if roc_auc else None,
                'predictions': y_pred
            }
        else:
            print("Regression evaluation not implemented in this version.")
    
    return results

## Neural Network Models

In [24]:
class DenseNeuralNetwork(nn.Module):

    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.3):
        super(DenseNeuralNetwork, self).__init__()
        
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dims[0]))
        layers.append(nn.BatchNorm1d(hidden_dims[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        for i in range(len(hidden_dims) - 1):
            layers.append(nn.Linear(hidden_dims[i], hidden_dims[i+1]))
            layers.append(nn.BatchNorm1d(hidden_dims[i+1]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
        layers.append(nn.Linear(hidden_dims[-1], output_dim))
        
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, nhead=4, num_layers=2, dropout_rate=0.1):
        super(TransformerModel, self).__init__()
        
        self.embedding = nn.Linear(input_dim, hidden_dim)
        
        # Positional Encoding
        # self.pos_encoder = PositionalEncoding(hidden_dim, dropout_rate)
        
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=hidden_dim, 
            nhead=nhead, 
            dim_feedforward=hidden_dim*4,
            dropout=dropout_rate
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        
        self.output_layer = nn.Linear(hidden_dim, output_dim)
        
        self._init_weights()
    
    def _init_weights(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    
    def forward(self, x):

        x = x.unsqueeze(1)  
        x = self.embedding(x)
        x = x.permute(1, 0, 2)  # [1, batch_size, hidden_dim]
        x = self.transformer_encoder(x)  # [1, batch_size, hidden_dim]
        
        x = x[0]
        x = self.output_layer(x)  # [batch_size, output_dim]
        
        return x

def train_pytorch_model(model, X_train, y_train, X_val, y_val, 
                      model_name, class_problem='multiclass', batch_size=64, 
                      epochs=50, learning_rate=0.001):

    model.to(device)
    X_train_tensor = torch.FloatTensor(X_train).to(device)
    X_val_tensor = torch.FloatTensor(X_val).to(device)
    
    if class_problem == 'binary':
        y_train_tensor = torch.FloatTensor(y_train).to(device)
        y_val_tensor = torch.FloatTensor(y_val).to(device)
        criterion = nn.BCEWithLogitsLoss()
        output_activation = torch.sigmoid
    elif class_problem == 'multiclass':
        y_train_tensor = torch.LongTensor(y_train).to(device)
        y_val_tensor = torch.LongTensor(y_val).to(device)
        criterion = nn.CrossEntropyLoss()
        output_activation = lambda x: torch.softmax(x, dim=1)
    else:  # Regression
        y_train_tensor = torch.FloatTensor(y_train).to(device)
        y_val_tensor = torch.FloatTensor(y_val).to(device)
        criterion = nn.MSELoss()
        output_activation = lambda x: x

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.1, patience=5, verbose=True)
    
    # Early Stopping
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    best_model_state = None
    
    history = {
        'train_loss': [],
        'val_loss': [],
        'train_acc': [],
        'val_acc': []
    }
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        correct_train = 0
        total_train = 0
        
        for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]"):
            optimizer.zero_grad()
            
            outputs = model(inputs)
            if class_problem == 'binary':
                loss = criterion(outputs.squeeze(), targets)
            else:
                loss = criterion(outputs, targets)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            
            # Calculate Accuracy
            if class_problem in ['binary', 'multiclass']:
                if class_problem == 'binary':
                    predicted = (torch.sigmoid(outputs.squeeze()) > 0.5).float()
                else:
                    _, predicted = torch.max(outputs, 1)
                
                correct_train += (predicted == targets).sum().item()
                total_train += targets.size(0)
        
        model.eval()
        val_loss = 0
        correct_val = 0
        total_val = 0
        
        with torch.no_grad():
            for inputs, targets in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]"):
                outputs = model(inputs)
                
                if class_problem == 'binary':
                    loss = criterion(outputs.squeeze(), targets)
                else:
                    loss = criterion(outputs, targets)
                
                val_loss += loss.item() * inputs.size(0)
                
                if class_problem in ['binary', 'multiclass']:
                    if class_problem == 'binary':
                        predicted = (torch.sigmoid(outputs.squeeze()) > 0.5).float()
                    else:
                        _, predicted = torch.max(outputs, 1)
                    
                    correct_val += (predicted == targets).sum().item()
                    total_val += targets.size(0)
    
        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_val_loss = val_loss / len(val_loader.dataset)

        history['train_loss'].append(avg_train_loss)
        history['val_loss'].append(avg_val_loss)
        
        if class_problem in ['binary', 'multiclass']:
            train_acc = correct_train / total_train if total_train > 0 else 0
            val_acc = correct_val / total_val if total_val > 0 else 0
            history['train_acc'].append(train_acc)
            history['val_acc'].append(val_acc)
            
            print(f"Epoch {epoch+1}/{epochs} - "
                  f"Train Loss: {avg_train_loss:.4f} - "
                  f"Val Loss: {avg_val_loss:.4f} - "
                  f"Train Acc: {train_acc:.4f} - "
                  f"Val Acc: {val_acc:.4f}")
        else:
            print(f"Epoch {epoch+1}/{epochs} - "
                  f"Train Loss: {avg_train_loss:.4f} - "
                  f"Val Loss: {avg_val_loss:.4f}")
        
        # Adjust Learning Rate
        scheduler.step(avg_val_loss)

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict().copy()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    save_model(model, f"pytorch_{model_name}", {
        'model_type': model_name,
        'best_val_loss': best_val_loss,
        'epochs_trained': epoch + 1,
        'date_trained': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'input_dim': next(model.parameters()).shape[1] if len(list(model.parameters())) > 0 else None,
        'output_dim': list(model.parameters())[-1].shape[0] if len(list(model.parameters())) > 0 else None,
    })
    
    return {
        'model': model,
        'history': history,
        'best_val_loss': best_val_loss
    }

def train_neural_networks(datasets, class_problem='multiclass'):

    
    X_train, y_train = datasets['X_train'], datasets['y_train']
    X_test, y_test = datasets['X_test'], datasets['y_test']
    categorical_cols = datasets['categorical_cols']
    numerical_cols = datasets['numerical_cols']
    
    preprocessor = create_preprocessing_pipeline(categorical_cols, numerical_cols)
    

    X_train_processed = preprocessor.fit_transform(X_train)
    X_test_processed = preprocessor.transform(X_test)

    

    joblib.dump(preprocessor, os.path.join(MODELS_DIR, "preprocessor.pkl"))
    

    if class_problem == 'binary':

        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train)
        y_test_encoded = label_encoder.transform(y_test)
        

        n_classes = 1
        output_activation = 'sigmoid'
        
    elif class_problem == 'multiclass':

        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train)
        y_test_encoded = label_encoder.transform(y_test)
        

        n_classes = len(np.unique(y_train_encoded))
        output_activation = 'softmax'
        

        joblib.dump(label_encoder, os.path.join(MODELS_DIR, "label_encoder.pkl"))
        
    else:

        y_train_encoded = y_train
        y_test_encoded = y_test
        n_classes = 1
        output_activation = 'linear'
    

    X_train_final, X_val, y_train_final, y_val = train_test_split(
        X_train_processed, y_train_encoded, test_size=0.2, random_state=42
    )
    

    input_dim = X_train_processed.shape[1]
    

    nn_results = {}
    
    print("\nTraining Dense Neural Network...")
    hidden_dims = [256, 128, 64]  
    
    dnn_model = DenseNeuralNetwork(
        input_dim=input_dim,
        hidden_dims=hidden_dims,
        output_dim=n_classes if class_problem == 'multiclass' else 1,
        dropout_rate=0.3
    )
    
    dnn_results = train_pytorch_model(
        dnn_model, 
        X_train_final, y_train_final, 
        X_val, y_val, 
        'dnn',
        class_problem
    )
    
    dnn_model.eval()
    with torch.no_grad():
        X_test_tensor = torch.FloatTensor(X_test_processed).to(device)
        outputs = dnn_model(X_test_tensor)
        
        if class_problem == 'binary':
            y_pred_dnn = (torch.sigmoid(outputs.squeeze()) > 0.5).cpu().numpy().astype(int)
        elif class_problem == 'multiclass':
            _, y_pred_dnn = torch.max(outputs, 1)
            y_pred_dnn = y_pred_dnn.cpu().numpy()
        else:
            y_pred_dnn = outputs.cpu().numpy()
    
    # Evaluate DNN
    if class_problem in ['binary', 'multiclass']:
        accuracy = accuracy_score(y_test_encoded, y_pred_dnn)
        precision = precision_score(y_test_encoded, y_pred_dnn, average='weighted')
        recall = recall_score(y_test_encoded, y_pred_dnn, average='weighted')
        f1 = f1_score(y_test_encoded, y_pred_dnn, average='weighted')
        
        print(f"DNN - Accuracy: {accuracy:.4f}")
        print(f"DNN - Precision: {precision:.4f}")
        print(f"DNN - Recall: {recall:.4f}")
        print(f"DNN - F1 Score: {f1:.4f}")
        
        report = classification_report(y_test_encoded, y_pred_dnn, output_dict=True)
        
        evaluation_results = {
            'Model': 'Dense Neural Network',
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1,
            'Classification Report': report,
            'Training History': {
                'train_loss': dnn_results['history']['train_loss'],
                'val_loss': dnn_results['history']['val_loss'],
                'train_acc': dnn_results['history']['train_acc'],
                'val_acc': dnn_results['history']['val_acc'],
            },
            'Training Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        save_evaluation_results(evaluation_results, "pytorch_dnn")
        
        nn_results['dnn'] = {
            'model': dnn_model,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'history': dnn_results['history']
        }
    else:
        mae = mean_absolute_error(y_test_encoded, y_pred_dnn)
        rmse = np.sqrt(mean_squared_error(y_test_encoded, y_pred_dnn))
        r2 = r2_score(y_test_encoded, y_pred_dnn)
        
        print(f"DNN - MAE: {mae:.4f}")
        print(f"DNN - RMSE: {rmse:.4f}")
        print(f"DNN - R²: {r2:.4f}")
        
        nn_results['dnn'] = {
            'model': dnn_model,
            'mae': mae,
            'rmse': rmse,
            'r2': r2,
            'history': dnn_results['history']
        }
    
    print("\nTraining Transformer Model...")
    hidden_dim = 128  

    transformer_model = TransformerModel(
        input_dim=input_dim,
        hidden_dim=hidden_dim,
        output_dim=n_classes if class_problem == 'multiclass' else 1,
        nhead=4, 
        num_layers=2  
    )
    
    transformer_results = train_pytorch_model(
        transformer_model, 
        X_train_final, y_train_final, 
        X_val, y_val, 
        'transformer',
        class_problem
    )
    
    # Best Model
    transformer_model.eval()
    with torch.no_grad():
        X_test_tensor = torch.FloatTensor(X_test_processed).to(device)
        outputs = transformer_model(X_test_tensor)
        
        if class_problem == 'binary':
            y_pred_transformer = (torch.sigmoid(outputs.squeeze()) > 0.5).cpu().numpy().astype(int)
        elif class_problem == 'multiclass':
            _, y_pred_transformer = torch.max(outputs, 1)
            y_pred_transformer = y_pred_transformer.cpu().numpy()
        else:
            y_pred_transformer = outputs.cpu().numpy()
    

    if class_problem in ['binary', 'multiclass']:
        accuracy = accuracy_score(y_test_encoded, y_pred_transformer)
        precision = precision_score(y_test_encoded, y_pred_transformer, average='weighted')
        recall = recall_score(y_test_encoded, y_pred_transformer, average='weighted')
        f1 = f1_score(y_test_encoded, y_pred_transformer, average='weighted')
        
        print(f"Transformer - Accuracy: {accuracy:.4f}")
        print(f"Transformer - Precision: {precision:.4f}")
        print(f"Transformer - Recall: {recall:.4f}")
        print(f"Transformer - F1 Score: {f1:.4f}")

        report = classification_report(y_test_encoded, y_pred_transformer, output_dict=True)

        evaluation_results = {
            'Model': 'Transformer Model (PyTorch)',
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1,
            'Classification Report': report,
            'Training History': {
                'train_loss': transformer_results['history']['train_loss'],
                'val_loss': transformer_results['history']['val_loss'],
                'train_acc': transformer_results['history']['train_acc'],
                'val_acc': transformer_results['history']['val_acc'],
            },
            'Training Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        save_evaluation_results(evaluation_results, "pytorch_transformer")
        
        nn_results['transformer'] = {
            'model': transformer_model,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'history': transformer_results['history']
        }
    else:
        mae = mean_absolute_error(y_test_encoded, y_pred_transformer)
        rmse = np.sqrt(mean_squared_error(y_test_encoded, y_pred_transformer))
        r2 = r2_score(y_test_encoded, y_pred_transformer)
        
        print(f"Transformer - MAE: {mae:.4f}")
        print(f"Transformer - RMSE: {rmse:.4f}")
        print(f"Transformer - R²: {r2:.4f}")
        
        nn_results['transformer'] = {
            'model': transformer_model,
            'mae': mae,
            'rmse': rmse,
            'r2': r2,
            'history': transformer_results['history']
        }
    
    return nn_results

## Optimize And Evaluate

In [26]:
def evaluate_model_performance(X_test, y_test, models, class_problem='multiclass'):
    print("\n--- Evaluating Model Performance ---")
    
    # Load the preprocessor
    try:
        preprocessor = joblib.load(os.path.join(MODELS_DIR, "preprocessor.pkl"))
        X_test_processed = preprocessor.transform(X_test)
    except:
        print("Error: Could not load preprocessor. Make sure it was saved during training.")
        return None
    
    performance = {}
    
    if class_problem in ['binary', 'multiclass']:
        accuracies = []
        precisions = []
        recalls = []
        f1_scores = []
        model_names = []

        for name, model_info in models.items():
            model = model_info['model']
            
            if isinstance(model, nn.Module):
                # For Neural Network
                model.eval()
                with torch.no_grad():
                    X_test_tensor = torch.FloatTensor(X_test_processed).to(device)
                    outputs = model(X_test_tensor)
                    
                    if class_problem == 'binary':
                        y_pred = (torch.sigmoid(outputs.squeeze()) > 0.5).cpu().numpy().astype(int)
                    else:
                        _, y_pred = torch.max(outputs, 1)
                        y_pred = y_pred.cpu().numpy()
            else:
                # For scikit-learn models
                y_pred = model.predict(X_test)  # sklearn models handle preprocessing internally
            
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')

            accuracies.append(accuracy)
            precisions.append(precision)
            recalls.append(recall)
            f1_scores.append(f1)
            model_names.append(name)

            performance[name] = {
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1_score': f1
            }
            
            print(f"Model: {name}")
            print(f"  Accuracy: {accuracy:.4f}")
            print(f"  Precision: {precision:.4f}")
            print(f"  Recall: {recall:.4f}")
            print(f"  F1 Score: {f1:.4f}")
            
        best_model_idx = np.argmax(f1_scores)
        best_model_name = model_names[best_model_idx]
        
        print(f"\nBest model based on F1 Score: {best_model_name}")
        print(f"  Accuracy: {accuracies[best_model_idx]:.4f}")
        print(f"  Precision: {precisions[best_model_idx]:.4f}")
        print(f"  Recall: {recalls[best_model_idx]:.4f}")
        print(f"  F1 Score: {f1_scores[best_model_idx]:.4f}")
    
    # Regression
    else:
        maes = []
        rmses = []
        r2s = []
        model_names = []

        for name, model_info in models.items():
            model = model_info['model']

            if isinstance(model, nn.Module):
                model.eval()
                with torch.no_grad():
                    X_test_tensor = torch.FloatTensor(X_test).to(device)
                    y_pred = model(X_test_tensor).cpu().numpy()
            else:

                y_pred = model.predict(X_test)
            

            mae = mean_absolute_error(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            r2 = r2_score(y_test, y_pred)
            
            maes.append(mae)
            rmses.append(rmse)
            r2s.append(r2)
            model_names.append(name)
            
            performance[name] = {
                'mae': mae,
                'rmse': rmse,
                'r2': r2
            }
            
            print(f"Model: {name}")
            print(f"  MAE: {mae:.4f}")
            print(f"  RMSE: {rmse:.4f}")
            print(f"  R²: {r2:.4f}")

        best_model_idx = np.argmin(rmses)
        best_model_name = model_names[best_model_idx]
        
        print(f"\nBest model based on RMSE: {best_model_name}")
        print(f"  MAE: {maes[best_model_idx]:.4f}")
        print(f"  RMSE: {rmses[best_model_idx]:.4f}")
        print(f"  R²: {r2s[best_model_idx]:.4f}")
    
    return performance

def analyze_feature_importance(X_train, model, feature_names, model_name):

    print(f"\n--- Analyzing Feature Importance for {model_name} ---")
    
    if hasattr(model, 'feature_importances_'):
        feature_importances = model.feature_importances_
    elif hasattr(model, 'coef_'):
        feature_importances = np.abs(model.coef_[0]) if len(model.coef_.shape) > 1 else np.abs(model.coef_)
    else:
        print(f"Model {model_name} does not provide feature importance information")
        return
    
    if len(feature_names) != len(feature_importances):
        print(f"Length mismatch: {len(feature_names)} feature names vs {len(feature_importances)} importance values")
        if len(feature_names) > len(feature_importances):
            feature_names = feature_names[:len(feature_importances)]
        else:
            feature_importances = feature_importances[:len(feature_names)]
    
    importance_df = pd.DataFrame({
        'Feature': feature_names,
        'Importance': feature_importances
    })
    
    # Sort by Importance
    importance_df = importance_df.sort_values('Importance', ascending=False)
    
    importance_df.to_csv(os.path.join(RESULTS_DIR, f'{model_name}_feature_importance.csv'), index=False)

    # Output Top 10 Important Features
    top_features = importance_df.head(10)
    print("Top 10 most important features:")
    for i, (feature, importance) in enumerate(zip(top_features['Feature'], top_features['Importance'])):
        print(f"  {i+1}. {feature}: {importance:.4f}")
    
    return importance_df

## Load Data

In [28]:
# Load data
df = load_feature_data('all_features.parquet')

# Sample
df_sample = get_data_sample(df)

Use full dataset (100000 records) for model development? (y/n, default: n):  n


Using 100000 records for model development.


## Datasets

In [30]:
target_col, class_problem = get_problem_config(df_sample)

# Drop features
features_to_drop = [
    'id', 'case_number', 'date', 'block', 'location', 'last_updated',
    'hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos', 
    'month_sin', 'month_cos'
]

datasets = prepare_datasets(
    df_sample, target_col, test_size=0.3, 
    random_state=42, features_to_drop=features_to_drop
)

if datasets is None:
    raise ValueError("Error preparing datasets")
    
# datasets = handle_class_imbalance_if_needed(datasets, class_problem)


Available target variables:
1. crime_category (6 unique values)
2. crime_type (31 unique values)
3. is_arrest (2 unique values)
4. is_domestic (2 unique values)
5. threat_level (3 unique values)


Choose target variable (1-5, default: 1):  4
Confirm problem type (binary/multiclass/regression, default: binary):  binary


Using is_domestic as target with binary problem type
Features: 66 total, 9 categorical, 32 numerical
Training set: 70000 samples
Test set: 30000 samples
Target distribution in training set:
  False: 84.83%
  True: 15.17%


## Train Models

In [32]:
# Train models
ml_results = train_traditional_models(datasets, class_problem)
nn_results = train_neural_networks(datasets, class_problem)

# ml_results = optimize_if_requested(datasets, ml_results, class_problem)

# Evaluate all models
all_models = {**ml_results, **nn_results} if nn_results else ml_results

performance = evaluate_model_performance(
    datasets['X_test'], 
    datasets['y_test'],
    all_models,
    class_problem
)

# Save
save_final_results(performance, target_col, class_problem)


--- Training Traditional ML Models ---

Training random_forest...
Accuracy: 0.9308
Precision: 0.9277
Recall: 0.9308
F1 Score: 0.9274
Model saved to models/ml_random_forest.pkl
Model info saved to models/ml_random_forest_info.json
Evaluation results saved to results/ml_random_forest_evaluation.txt

Training gradient_boosting...
Accuracy: 0.9317
Precision: 0.9287
Recall: 0.9317
F1 Score: 0.9287
Model saved to models/ml_gradient_boosting.pkl
Model info saved to models/ml_gradient_boosting_info.json
Evaluation results saved to results/ml_gradient_boosting_evaluation.txt





Training Dense Neural Network...


Epoch 1/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 577.04it/s]
Epoch 1/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2032.27it/s]


Epoch 1/50 - Train Loss: 0.2315 - Val Loss: 0.1905 - Train Acc: 0.9130 - Val Acc: 0.9239


Epoch 2/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 645.39it/s]
Epoch 2/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2083.75it/s]


Epoch 2/50 - Train Loss: 0.1886 - Val Loss: 0.1836 - Train Acc: 0.9293 - Val Acc: 0.9299


Epoch 3/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 674.05it/s]
Epoch 3/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2342.73it/s]


Epoch 3/50 - Train Loss: 0.1831 - Val Loss: 0.1830 - Train Acc: 0.9302 - Val Acc: 0.9302


Epoch 4/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 683.43it/s]
Epoch 4/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2390.50it/s]


Epoch 4/50 - Train Loss: 0.1817 - Val Loss: 0.1833 - Train Acc: 0.9310 - Val Acc: 0.9301


Epoch 5/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 695.99it/s]
Epoch 5/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2535.90it/s]


Epoch 5/50 - Train Loss: 0.1800 - Val Loss: 0.1819 - Train Acc: 0.9316 - Val Acc: 0.9303


Epoch 6/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 707.31it/s]
Epoch 6/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2550.36it/s]


Epoch 6/50 - Train Loss: 0.1773 - Val Loss: 0.1826 - Train Acc: 0.9317 - Val Acc: 0.9304


Epoch 7/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 648.32it/s]
Epoch 7/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2571.27it/s]


Epoch 7/50 - Train Loss: 0.1765 - Val Loss: 0.1838 - Train Acc: 0.9325 - Val Acc: 0.9299


Epoch 8/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 708.17it/s]
Epoch 8/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 2430.12it/s]


Epoch 8/50 - Train Loss: 0.1740 - Val Loss: 0.1837 - Train Acc: 0.9323 - Val Acc: 0.9302


Epoch 9/50 [Train]: 100%|████████████████████| 875/875 [00:01<00:00, 616.88it/s]
Epoch 9/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1958.40it/s]


Epoch 9/50 - Train Loss: 0.1731 - Val Loss: 0.1847 - Train Acc: 0.9325 - Val Acc: 0.9300


Epoch 10/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 735.01it/s]
Epoch 10/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2557.30it/s]


Epoch 10/50 - Train Loss: 0.1720 - Val Loss: 0.1843 - Train Acc: 0.9328 - Val Acc: 0.9296


Epoch 11/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 693.27it/s]
Epoch 11/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2504.66it/s]


Epoch 11/50 - Train Loss: 0.1707 - Val Loss: 0.1842 - Train Acc: 0.9338 - Val Acc: 0.9296


Epoch 12/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 711.53it/s]
Epoch 12/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2552.63it/s]


Epoch 12/50 - Train Loss: 0.1661 - Val Loss: 0.1839 - Train Acc: 0.9349 - Val Acc: 0.9293


Epoch 13/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 735.24it/s]
Epoch 13/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2557.81it/s]


Epoch 13/50 - Train Loss: 0.1647 - Val Loss: 0.1847 - Train Acc: 0.9345 - Val Acc: 0.9296


Epoch 14/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 700.94it/s]
Epoch 14/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2524.63it/s]


Epoch 14/50 - Train Loss: 0.1636 - Val Loss: 0.1857 - Train Acc: 0.9349 - Val Acc: 0.9296


Epoch 15/50 [Train]: 100%|███████████████████| 875/875 [00:01<00:00, 704.46it/s]
Epoch 15/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 2071.37it/s]


Epoch 15/50 - Train Loss: 0.1623 - Val Loss: 0.1849 - Train Acc: 0.9350 - Val Acc: 0.9296
Early stopping at epoch 15
Model saved to models/pytorch_dnn.pkl
Model info saved to models/pytorch_dnn_info.json
DNN - Accuracy: 0.9315
DNN - Precision: 0.9285
DNN - Recall: 0.9315
DNN - F1 Score: 0.9285
Evaluation results saved to results/pytorch_dnn_evaluation.txt

Training Transformer Model...


Epoch 1/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 241.33it/s]
Epoch 1/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1148.89it/s]


Epoch 1/50 - Train Loss: 0.2021 - Val Loss: 0.1903 - Train Acc: 0.9240 - Val Acc: 0.9272


Epoch 2/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 257.47it/s]
Epoch 2/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1346.32it/s]


Epoch 2/50 - Train Loss: 0.1857 - Val Loss: 0.1912 - Train Acc: 0.9296 - Val Acc: 0.9295


Epoch 3/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 269.29it/s]
Epoch 3/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1262.23it/s]


Epoch 3/50 - Train Loss: 0.1822 - Val Loss: 0.1881 - Train Acc: 0.9299 - Val Acc: 0.9287


Epoch 4/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 285.95it/s]
Epoch 4/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1359.51it/s]


Epoch 4/50 - Train Loss: 0.1805 - Val Loss: 0.1882 - Train Acc: 0.9307 - Val Acc: 0.9289


Epoch 5/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 285.28it/s]
Epoch 5/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1180.82it/s]


Epoch 5/50 - Train Loss: 0.1807 - Val Loss: 0.1893 - Train Acc: 0.9303 - Val Acc: 0.9287


Epoch 6/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 286.78it/s]
Epoch 6/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1260.35it/s]


Epoch 6/50 - Train Loss: 0.1804 - Val Loss: 0.1867 - Train Acc: 0.9304 - Val Acc: 0.9290


Epoch 7/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 277.94it/s]
Epoch 7/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1221.24it/s]


Epoch 7/50 - Train Loss: 0.1790 - Val Loss: 0.1875 - Train Acc: 0.9309 - Val Acc: 0.9291


Epoch 8/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 283.81it/s]
Epoch 8/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1369.34it/s]


Epoch 8/50 - Train Loss: 0.1774 - Val Loss: 0.1886 - Train Acc: 0.9311 - Val Acc: 0.9303


Epoch 9/50 [Train]: 100%|████████████████████| 875/875 [00:03<00:00, 281.88it/s]
Epoch 9/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 1400.17it/s]


Epoch 9/50 - Train Loss: 0.1768 - Val Loss: 0.1964 - Train Acc: 0.9314 - Val Acc: 0.9286


Epoch 10/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 283.08it/s]
Epoch 10/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1159.36it/s]


Epoch 10/50 - Train Loss: 0.1769 - Val Loss: 0.1860 - Train Acc: 0.9311 - Val Acc: 0.9289


Epoch 11/50 [Train]: 100%|███████████████████| 875/875 [00:02<00:00, 291.97it/s]
Epoch 11/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1389.86it/s]


Epoch 11/50 - Train Loss: 0.1760 - Val Loss: 0.1897 - Train Acc: 0.9313 - Val Acc: 0.9269


Epoch 12/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 281.11it/s]
Epoch 12/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1282.11it/s]


Epoch 12/50 - Train Loss: 0.1757 - Val Loss: 0.1901 - Train Acc: 0.9319 - Val Acc: 0.9286


Epoch 13/50 [Train]: 100%|███████████████████| 875/875 [00:04<00:00, 210.08it/s]
Epoch 13/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1110.24it/s]


Epoch 13/50 - Train Loss: 0.1748 - Val Loss: 0.1866 - Train Acc: 0.9323 - Val Acc: 0.9295


Epoch 14/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 251.69it/s]
Epoch 14/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1228.38it/s]


Epoch 14/50 - Train Loss: 0.1747 - Val Loss: 0.1873 - Train Acc: 0.9319 - Val Acc: 0.9289


Epoch 15/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 270.99it/s]
Epoch 15/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1369.41it/s]


Epoch 15/50 - Train Loss: 0.1745 - Val Loss: 0.1904 - Train Acc: 0.9321 - Val Acc: 0.9289


Epoch 16/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 265.69it/s]
Epoch 16/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1006.50it/s]


Epoch 16/50 - Train Loss: 0.1745 - Val Loss: 0.1845 - Train Acc: 0.9317 - Val Acc: 0.9299


Epoch 17/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 250.87it/s]
Epoch 17/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1142.40it/s]


Epoch 17/50 - Train Loss: 0.1737 - Val Loss: 0.1862 - Train Acc: 0.9317 - Val Acc: 0.9293


Epoch 18/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 247.20it/s]
Epoch 18/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1303.25it/s]


Epoch 18/50 - Train Loss: 0.1737 - Val Loss: 0.1851 - Train Acc: 0.9322 - Val Acc: 0.9294


Epoch 19/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 265.53it/s]
Epoch 19/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1319.70it/s]


Epoch 19/50 - Train Loss: 0.1733 - Val Loss: 0.1840 - Train Acc: 0.9324 - Val Acc: 0.9286


Epoch 20/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 270.57it/s]
Epoch 20/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1015.88it/s]


Epoch 20/50 - Train Loss: 0.1727 - Val Loss: 0.1845 - Train Acc: 0.9311 - Val Acc: 0.9294


Epoch 21/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 253.42it/s]
Epoch 21/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1072.78it/s]


Epoch 21/50 - Train Loss: 0.1732 - Val Loss: 0.1863 - Train Acc: 0.9322 - Val Acc: 0.9284


Epoch 22/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 270.60it/s]
Epoch 22/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1287.22it/s]


Epoch 22/50 - Train Loss: 0.1721 - Val Loss: 0.1929 - Train Acc: 0.9324 - Val Acc: 0.9292


Epoch 23/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 252.02it/s]
Epoch 23/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1324.25it/s]


Epoch 23/50 - Train Loss: 0.1725 - Val Loss: 0.1850 - Train Acc: 0.9328 - Val Acc: 0.9299


Epoch 24/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 267.40it/s]
Epoch 24/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1338.92it/s]


Epoch 24/50 - Train Loss: 0.1719 - Val Loss: 0.1858 - Train Acc: 0.9323 - Val Acc: 0.9289


Epoch 25/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 258.35it/s]
Epoch 25/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1028.46it/s]


Epoch 25/50 - Train Loss: 0.1713 - Val Loss: 0.1857 - Train Acc: 0.9327 - Val Acc: 0.9301


Epoch 26/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 269.06it/s]
Epoch 26/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1222.28it/s]


Epoch 26/50 - Train Loss: 0.1687 - Val Loss: 0.1855 - Train Acc: 0.9338 - Val Acc: 0.9289


Epoch 27/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 252.53it/s]
Epoch 27/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1375.66it/s]


Epoch 27/50 - Train Loss: 0.1674 - Val Loss: 0.1865 - Train Acc: 0.9342 - Val Acc: 0.9290


Epoch 28/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 250.19it/s]
Epoch 28/50 [Val]: 100%|█████████████████████| 219/219 [00:00<00:00, 985.94it/s]


Epoch 28/50 - Train Loss: 0.1672 - Val Loss: 0.1850 - Train Acc: 0.9337 - Val Acc: 0.9296


Epoch 29/50 [Train]: 100%|███████████████████| 875/875 [00:03<00:00, 241.24it/s]
Epoch 29/50 [Val]: 100%|████████████████████| 219/219 [00:00<00:00, 1102.64it/s]


Epoch 29/50 - Train Loss: 0.1668 - Val Loss: 0.1856 - Train Acc: 0.9338 - Val Acc: 0.9291
Early stopping at epoch 29
Model saved to models/pytorch_transformer.pkl
Model info saved to models/pytorch_transformer_info.json
Transformer - Accuracy: 0.9312
Transformer - Precision: 0.9283
Transformer - Recall: 0.9312
Transformer - F1 Score: 0.9288
Evaluation results saved to results/pytorch_transformer_evaluation.txt

--- Evaluating Model Performance ---
Model: random_forest
  Accuracy: 0.9308
  Precision: 0.9277
  Recall: 0.9308
  F1 Score: 0.9274
Model: gradient_boosting
  Accuracy: 0.9317
  Precision: 0.9287
  Recall: 0.9317
  F1 Score: 0.9287
Model: dnn
  Accuracy: 0.9315
  Precision: 0.9285
  Recall: 0.9315
  F1 Score: 0.9285
Model: transformer
  Accuracy: 0.9312
  Precision: 0.9283
  Recall: 0.9312
  F1 Score: 0.9288

Best model based on F1 Score: transformer
  Accuracy: 0.9312
  Precision: 0.9283
  Recall: 0.9312
  F1 Score: 0.9288
Final results saved to results/final_model_performance