In [7]:
# =============================================================================
# BLOCK 1: SETUP, IMPORTS, AND DATA LOADING
# =============================================================================
import warnings
warnings.filterwarnings('ignore')
import time
import os
# --- Library Imports ---
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import OneCycleLR
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error
import joblib
import os
import gc
print("Libraries imported successfully.")
# --- Helper Function for Winkler Score ---
def winkler_score(y_true, lower, upper, alpha=0.1, return_coverage=False):
    width = upper - lower
    penalty_lower = np.where(y_true < lower, (2 / alpha) * (lower - y_true), 0)
    penalty_upper = np.where(y_true > upper, (2 / alpha) * (y_true - upper), 0)
    score = width + penalty_lower + penalty_upper
    if return_coverage:
        coverage = np.mean((y_true >= lower) & (y_true <= upper))
        return np.mean(score), coverage
    return np.mean(score)
# --- Global Constants ---
N_SPLITS = 5
RANDOM_STATE = 42
DATA_PATH = './'
N_OPTUNA_TRIALS = 30 # A strong number for a comprehensive search
COMPETITION_ALPHA = 0.1

# --- Load Raw Data ---
try:
    # We drop the low-variance columns they identified right away
    drop_cols=['id', 'golf', 'view_rainier', 'view_skyline', 'view_lakesamm','view_otherwater', 'view_other']
    df_train = pd.read_csv(DATA_PATH + 'dataset.csv').drop(columns=drop_cols)
    df_test = pd.read_csv(DATA_PATH + 'test.csv').drop(columns=drop_cols)
    print("Raw data loaded successfully.")
except FileNotFoundError:
    print("ERROR: Could not find 'dataset.csv' or 'test.csv'.")
    exit()
# --- Prepare Target Variable ---
y_true = df_train['sale_price'].copy()
grade_for_stratify = df_train['grade'].copy()
# The mean-error model works best when predicting the raw price directly
# So, we will NOT log-transform the target this time.
# df_train.drop('sale_price', axis=1, inplace=True) # We keep sale_price for FE
print("Setup complete.")


Libraries imported successfully.
Raw data loaded successfully.
Setup complete.


In [2]:
# Make sure to have these libraries installed
# pip install pandas numpy scikit-learn

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
import gc

# Define a random state for reproducibility
RANDOM_STATE = 42

def create_comprehensive_features(df_train, df_test):
    """
    Combines original and new advanced feature engineering steps into a single pipeline.
    """
    print("--- Starting Comprehensive Feature Engineering ---")

    # Store original indices and target variable
    train_ids = df_train.index
    test_ids = df_test.index
    y_train = df_train['sale_price'].copy() # Keep the target separate

    # Combine for consistent processing
    df_train_temp = df_train.drop(columns=['sale_price'])
    all_data = pd.concat([df_train_temp, df_test], axis=0, ignore_index=True)

    # --- Original Feature Engineering ---

    # A) Brute-Force Numerical Interactions
    print("Step 1: Creating brute-force numerical interaction features...")
    NUMS = ['area', 'land_val', 'imp_val', 'sqft_lot', 'sqft', 'sqft_1', 'grade', 'year_built']
    # Ensure all columns exist and are numeric, fill missing with 0 for safety
    for col in NUMS:
        if col not in all_data.columns:
            all_data[col] = 0
        else:
            all_data[col] = pd.to_numeric(all_data[col], errors='coerce').fillna(0)
            
    for i in range(len(NUMS)):
        for j in range(i + 1, len(NUMS)):
            all_data[f'{NUMS[i]}_x_{NUMS[j]}'] = all_data[NUMS[i]] * all_data[NUMS[j]]

    # B) Date Features
    print("Step 2: Creating date features...")
    all_data['sale_date'] = pd.to_datetime(all_data['sale_date'])
    all_data['sale_year'] = all_data['sale_date'].dt.year
    all_data['sale_month'] = all_data['sale_date'].dt.month
    all_data['sale_dayofyear'] = all_data['sale_date'].dt.dayofyear
    all_data['age_at_sale'] = all_data['sale_year'] - all_data['year_built']

    # C) TF-IDF Text Features
    print("Step 3: Creating TF-IDF features for text columns...")
    text_cols = ['subdivision', 'zoning', 'city', 'sale_warning', 'join_status', 'submarket']
    all_data[text_cols] = all_data[text_cols].fillna('missing').astype(str)
    
    for col in text_cols:
        tfidf = TfidfVectorizer(analyzer='char', ngram_range=(3, 5), max_features=128, binary=True)
        svd = TruncatedSVD(n_components=8, random_state=RANDOM_STATE)
        
        tfidf_matrix = tfidf.fit_transform(all_data[col])
        tfidf_svd = svd.fit_transform(tfidf_matrix)
        
        tfidf_df = pd.DataFrame(tfidf_svd, columns=[f'{col}_tfidf_svd_{i}' for i in range(8)])
        all_data = pd.concat([all_data, tfidf_df], axis=1)

    # D) Log transform some interaction features
    for c in ['land_val_x_imp_val', 'land_val_x_sqft', 'imp_val_x_sqft']:
        if c in all_data.columns:
            all_data[c] = np.log1p(all_data[c].fillna(0))

    # --- New Feature Engineering Ideas ---

    # F) Group-By Aggregation Features
    print("Step 4: Creating group-by aggregation features...")
    group_cols = ['submarket', 'city', 'zoning']
    num_cols_for_agg = ['grade', 'sqft', 'imp_val', 'land_val', 'age_at_sale']

    for group_col in group_cols:
        for num_col in num_cols_for_agg:
            agg_stats = all_data.groupby(group_col)[num_col].agg(['mean', 'std', 'max', 'min']).reset_index()
            agg_stats.columns = [group_col] + [f'{group_col}_{num_col}_{stat}' for stat in ['mean', 'std', 'max', 'min']]
            all_data = pd.merge(all_data, agg_stats, on=group_col, how='left')
            all_data[f'{num_col}_minus_{group_col}_mean'] = all_data[num_col] - all_data[f'{group_col}_{num_col}_mean']

    # G) Ratio Features
    print("Step 5: Creating ratio features...")
    # Add a small epsilon to prevent division by zero
    epsilon = 1e-6 
    all_data['total_val'] = all_data['imp_val'] + all_data['land_val']
    all_data['imp_val_to_land_val_ratio'] = all_data['imp_val'] / (all_data['land_val'] + epsilon)
    all_data['land_val_ratio'] = all_data['land_val'] / (all_data['total_val'] + epsilon)
    all_data['sqft_to_lot_ratio'] = all_data['sqft'] / (all_data['sqft_lot'] + epsilon)
    all_data['was_renovated'] = (all_data['year_reno'] > 0).astype(int)
    all_data['reno_age_at_sale'] = np.where(all_data['was_renovated'] == 1, all_data['sale_year'] - all_data['year_reno'], -1)

    # H) Geospatial Clustering Features
    print("Step 6: Creating geospatial clustering features...")
    coords = all_data[['latitude', 'longitude']].copy()
    coords.fillna(coords.median(), inplace=True) # Simple imputation

    # KMeans is sensitive to feature scaling, but for lat/lon it's often okay without it.
    kmeans = KMeans(n_clusters=20, random_state=RANDOM_STATE, n_init=10) 
    all_data['location_cluster'] = kmeans.fit_predict(coords)
    
    # Calculate distance to each cluster center
    cluster_centers = kmeans.cluster_centers_
    for i in range(len(cluster_centers)):
        center = cluster_centers[i]
        all_data[f'dist_to_cluster_{i}'] = np.sqrt((coords['latitude'] - center[0])**2 + (coords['longitude'] - center[1])**2)

    # --- Final Cleanup ---
    print("Step 7: Finalizing feature set...")
    cols_to_drop = ['sale_date', 'subdivision', 'zoning', 'city', 'sale_warning', 'join_status', 'submarket']
    all_data = all_data.drop(columns=cols_to_drop)

    # One-hot encode the new cluster feature
    all_data = pd.get_dummies(all_data, columns=['location_cluster'], prefix='loc_cluster')
    
    # Final check for any remaining object columns to be safe (besides index)
    object_cols = all_data.select_dtypes(include='object').columns
    if len(object_cols) > 0:
        print(f"Warning: Found unexpected object columns: {object_cols}. Dropping them.")
        all_data = all_data.drop(columns=object_cols)
        
    all_data.fillna(0, inplace=True)

    # Separate back into train and test sets
    train_len = len(train_ids)
    X = all_data.iloc[:train_len].copy()
    X_test = all_data.iloc[train_len:].copy()
    
    # Restore original indices
    X.index = train_ids
    X_test.index = test_ids
    
    # Align columns - crucial for model prediction
    X_test = X_test[X.columns]
    
    print(f"\nComprehensive FE complete. Total features: {X.shape[1]}")
    gc.collect()
    
    return X, X_test, y_train
# =============================================================================
# BLOCK 2.5: EXECUTE FEATURE ENGINEERING
# =============================================================================
print("\n--- Starting Block 2.5: Executing Feature Engineering Pipeline ---")

# This is the crucial step that was missing.
# We call the function to create our training and testing dataframes.
X, X_test, y_train = create_comprehensive_features(df_train, df_test)

# Let's verify the output
print(f"Feature engineering complete. X shape: {X.shape}, X_test shape: {X_test.shape}")
gc.collect()


--- Starting Block 2.5: Executing Feature Engineering Pipeline ---
--- Starting Comprehensive Feature Engineering ---
Step 1: Creating brute-force numerical interaction features...
Step 2: Creating date features...
Step 3: Creating TF-IDF features for text columns...
Step 4: Creating group-by aggregation features...
Step 5: Creating ratio features...
Step 6: Creating geospatial clustering features...
Step 7: Finalizing feature set...

Comprehensive FE complete. Total features: 233
Feature engineering complete. X shape: (200000, 233), X_test shape: (200000, 233)


0

In [3]:
# =============================================================================
# BLOCK 3: PYTORCH SETUP & FULL DATA PREPARATION
# =============================================================================


print(f"--- Starting Block 3: PyTorch Setup & Data Preparation ---")
print(f"PyTorch version: {torch.__version__}")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Data Scaling
print("\nScaling features and target variable for the Neural Network...")
feature_scaler = StandardScaler()
target_scaler = StandardScaler()
X_scaled = feature_scaler.fit_transform(X)
X_test_scaled = feature_scaler.transform(X_test)
y_true_scaled = target_scaler.fit_transform(y_true.to_numpy().reshape(-1, 1))

# Custom PyTorch Dataset
class HousePriceDataset(Dataset):
    def __init__(self, features, labels=None):
        self.features = features
        self.labels = labels
    def __len__(self): return len(self.features)
    def __getitem__(self, idx):
        features = torch.tensor(self.features[idx], dtype=torch.float32)
        if self.labels is not None:
            labels = torch.tensor(self.labels[idx], dtype=torch.float32)
            return features, labels
        return features
        
print("PyTorch setup and full data scaling complete.")

# =============================================================================
# BLOCK 3.5: CALCULATE THE SCALED ZERO THRESHOLD
# =============================================================================
print("\n--- Calculating the scaled value for a zero price ---")

# We transform the value 0 using the FITTED target_scaler.
# This gives us the exact value on the scaled distribution that corresponds to $0.
scaled_zero_threshold = target_scaler.transform(np.array([[0]]))[0, 0]

print(f"The scaled value of a $0 house price is: {scaled_zero_threshold:.4f}")
print("This will be used as the minimum clamp value in our neural network.")

--- Starting Block 3: PyTorch Setup & Data Preparation ---
PyTorch version: 2.7.1+cu126
Using device: cuda

Scaling features and target variable for the Neural Network...
PyTorch setup and full data scaling complete.

--- Calculating the scaled value for a zero price ---
The scaled value of a $0 house price is: -1.4006
This will be used as the minimum clamp value in our neural network.


In [4]:
# =============================================================================
# BLOCK 4: DEFINE THE STABILIZED RESIDUAL NEURAL NETWORK
# =============================================================================
class ResidualBlock(nn.Module):
    def __init__(self, input_size, output_size, dropout_rate):
        super(ResidualBlock, self).__init__()
        self.main_path = nn.Sequential(
            nn.Linear(input_size, output_size),
            nn.BatchNorm1d(output_size),
            nn.SiLU(),
            nn.Dropout(dropout_rate)
        )
        self.shortcut = nn.Identity() if input_size == output_size else nn.Linear(input_size, output_size)
    def forward(self, x): return self.main_path(x) + self.shortcut(x)

class ResidualNet(nn.Module):
    def __init__(self, input_shape, layer_sizes, dropout_rates):
        super(ResidualNet, self).__init__()
        layers = [nn.Linear(input_shape, layer_sizes[0]), nn.SiLU()]
        in_size = layer_sizes[0]
        for out_size, dropout in zip(layer_sizes, dropout_rates):
            layers.append(ResidualBlock(in_size, out_size, dropout))
            in_size = out_size
        layers.append(nn.Linear(in_size, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        output = self.model(x)
        # CRITICAL FIX #1: OUTPUT CLAMPING
        # Prevents the model from predicting impossibly large or small scaled values.
        return torch.clamp(output, min=scaled_zero_threshold, max=5.0)

print("STABILIZED Residual Neural Network architecture defined successfully.")

STABILIZED Residual Neural Network architecture defined successfully.


In [5]:
# =============================================================================
# BLOCK 5: TUNE NEURAL NETWORK MEAN MODEL
# =============================================================================
import optuna
from sklearn.model_selection import train_test_split
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR

# --- 1. Prepare Data for Tuning (using a random split) ---
print("--- Step 1: Preparing data for faster Optuna tuning... ---")
X_train_opt, X_val_opt, y_train_opt, y_val_opt = train_test_split(
    X_scaled, y_true_scaled, test_size=0.2, random_state=RANDOM_STATE
)
print(f"Data prepared. Training set size: {len(X_train_opt)}, Validation set size: {len(X_val_opt)}")

train_dataset_opt = HousePriceDataset(X_train_opt, y_train_opt)
val_dataset_opt = HousePriceDataset(X_val_opt, y_val_opt)
train_loader_opt = DataLoader(train_dataset_opt, batch_size=512, shuffle=True)
val_loader_opt = DataLoader(val_dataset_opt, batch_size=512, shuffle=False)

# --- 2. Define the Optuna Objective Function ---
def objective_nn(trial):
    # Define hyperparameter search space
    arch_choice = trial.suggest_categorical('architecture', ['arch_1', 'arch_2'])
    layer_sizes = {'arch_1': [512, 256, 128], 'arch_2': [1024, 512, 256]}[arch_choice]
    dropout_rates = [trial.suggest_float(f'dr_{i}', 0.1, 0.5) for i in range(len(layer_sizes))]
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    model = ResidualNet(X_scaled.shape[1], layer_sizes, dropout_rates).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = OneCycleLR(optimizer, max_lr=learning_rate, epochs=50, steps_per_epoch=len(train_loader_opt))
    loss_fn = nn.HuberLoss()
    
    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(50): # 50 epochs is enough for a tuning run
        model.train()
        for features, labels in train_loader_opt:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad(); loss = loss_fn(model(features), labels); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0); optimizer.step(); scheduler.step()
        
        model.eval()
        current_val_loss = 0
        with torch.no_grad():
            for features, labels in val_loader_opt:
                current_val_loss += loss_fn(model(features.to(device)), labels.to(device)).item()
        current_val_loss /= len(val_loader_opt)

        if current_val_loss < best_val_loss:
            best_val_loss = current_val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= 7: # Shorter patience for faster tuning
                break
                
    return best_val_loss

# --- 3. Run the Optuna Study ---
study_nn = optuna.create_study(direction='minimize')
print("\n--- Starting Neural Network Hyperparameter Tuning... ---")
study_nn.optimize(objective_nn, n_trials=75)

# --- 4. Store the Best Results ---
print("\n--- Neural Network Tuning Complete ---")
print(f"Best trial validation loss: {study_nn.best_value:.6f}")
best_params_nn = study_nn.best_params
print("Best hyperparameters found for Neural Network:", best_params_nn)

--- Step 1: Preparing data for faster Optuna tuning... ---


[I 2025-07-24 03:02:32,343] A new study created in memory with name: no-name-3e9886d0-26af-4c0a-a251-f29a84d52c92


Data prepared. Training set size: 160000, Validation set size: 40000

--- Starting Neural Network Hyperparameter Tuning... ---


[I 2025-07-24 03:04:26,426] Trial 0 finished with value: 0.03391383343105075 and parameters: {'architecture': 'arch_1', 'dr_0': 0.45033774156523054, 'dr_1': 0.4308754629223541, 'dr_2': 0.3197897302274969, 'learning_rate': 0.00019326391470813071, 'weight_decay': 1.3551798292603313e-06}. Best is trial 0 with value: 0.03391383343105075.
[I 2025-07-24 03:06:16,361] Trial 1 finished with value: 0.032120370577219164 and parameters: {'architecture': 'arch_2', 'dr_0': 0.3290716352055359, 'dr_1': 0.4791581967339372, 'dr_2': 0.21039946687518546, 'learning_rate': 0.0025080635391938055, 'weight_decay': 1.9879667167539206e-05}. Best is trial 1 with value: 0.032120370577219164.
[I 2025-07-24 03:08:05,530] Trial 2 finished with value: 0.03108148566812654 and parameters: {'architecture': 'arch_1', 'dr_0': 0.4948646326319316, 'dr_1': 0.24635200513006292, 'dr_2': 0.3384175725914985, 'learning_rate': 0.002060608168544838, 'weight_decay': 0.00036745069199052436}. Best is trial 2 with value: 0.031081485668


--- Neural Network Tuning Complete ---
Best trial validation loss: 0.030269
Best hyperparameters found for Neural Network: {'architecture': 'arch_1', 'dr_0': 0.3897635076916713, 'dr_1': 0.12176356784582548, 'dr_2': 0.2696972548349174, 'learning_rate': 0.001272196996939945, 'weight_decay': 0.00020557125870761814}


In [8]:
# =============================================================================
# BLOCK 6: K-FOLD TRAINING & SAVING WITH OPTIMAL NN PARAMETERS
# =============================================================================
from sklearn.model_selection import StratifiedKFold

print("\n" + "="*80)
print("--- Step 6: K-Fold Cross-Validation with Optimal Hyperparameters ---")
print("="*80)

# --- 1. Reconstruct Best Hyperparameters from Optuna ---
best_arch_choice = best_params_nn['architecture']
best_layer_sizes = {'arch_1': [512, 256, 128], 'arch_2': [1024, 512, 256]}[best_arch_choice]
best_dropout_rates = [best_params_nn[f'dr_{i}'] for i in range(len(best_layer_sizes))]
best_learning_rate = best_params_nn['learning_rate']
best_weight_decay = best_params_nn['weight_decay']

# --- 2. K-Fold Cross-Validation and Prediction Generation ---
EPOCHS, BATCH_SIZE, PATIENCE = 200, 512, 15
oof_nn_preds = np.zeros(len(X))
test_nn_preds = np.zeros(len(X_test))

skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE)
for fold, (train_idx, val_idx) in enumerate(skf.split(X_scaled, grade_for_stratify)):
    print(f"\n--- Training Fold {fold+1}/{N_SPLITS} ---")
    
    X_train, y_train = X_scaled[train_idx], y_true_scaled[train_idx]
    X_val, y_val = X_scaled[val_idx], y_true_scaled[val_idx]
    
    train_loader = DataLoader(HousePriceDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(HousePriceDataset(X_val, y_val), batch_size=BATCH_SIZE, shuffle=False)
    
    model = ResidualNet(X_scaled.shape[1], best_layer_sizes, best_dropout_rates).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=best_learning_rate, weight_decay=best_weight_decay)
    scheduler = OneCycleLR(optimizer, max_lr=best_learning_rate, epochs=EPOCHS, steps_per_epoch=len(train_loader))
    loss_fn = nn.HuberLoss()
    best_val_loss, best_model_state = float('inf'), None
    epochs_no_improve = 0

    for epoch in range(EPOCHS):
        model.train()
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)
            optimizer.zero_grad(); loss = loss_fn(model(features), labels); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0); optimizer.step(); scheduler.step()
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for features, labels in val_loader:
                val_loss += loss_fn(model(features.to(device)), labels.to(device)).item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 20 == 0: print(f" Epoch {epoch+1:03d} | Val Loss: {val_loss:.6f}")
        if val_loss < best_val_loss:
            best_val_loss, epochs_no_improve, best_model_state = val_loss, 0, model.state_dict().copy()
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= PATIENCE:
                print(f"\nEarly stopping at epoch {epoch+1}. Best validation loss: {best_val_loss:.6f}")
                break
    
    # Generate Predictions for the fold
    model.load_state_dict(best_model_state)
    model.eval()
    
    # OOF Predictions (inverse transform to get real prices)
    with torch.no_grad():
        raw_oof_preds = np.concatenate([model(f.to(device)).detach().cpu().numpy() for f, _ in val_loader])
    oof_nn_preds[val_idx] = np.clip(target_scaler.inverse_transform(raw_oof_preds).flatten(), 0, None)
    
    # Test Predictions (accumulate for averaging later)
    test_loader_fold = DataLoader(HousePriceDataset(X_test_scaled), batch_size=BATCH_SIZE*2, shuffle=False)
    with torch.no_grad():
        raw_test_preds = np.concatenate([model(f.to(device)).detach().cpu().numpy() for f in test_loader_fold])
    test_nn_preds += np.clip(target_scaler.inverse_transform(raw_test_preds).flatten(), 0, None) / N_SPLITS

# --- 3. Final Evaluation and Saving ---
print("\n" + "="*80)
print("--- Final Evaluation and Saving Predictions ---")
print("="*80)

final_oof_rmse = np.sqrt(mean_squared_error(y_true, oof_nn_preds))
print(f"Final NN Mean Model OOF RMSE: ${final_oof_rmse:,.2f}")




--- Step 6: K-Fold Cross-Validation with Optimal Hyperparameters ---

--- Training Fold 1/5 ---
 Epoch 020 | Val Loss: 0.037692
 Epoch 040 | Val Loss: 0.036052
 Epoch 060 | Val Loss: 0.034914
 Epoch 080 | Val Loss: 0.033411
 Epoch 100 | Val Loss: 0.033753

Early stopping at epoch 104. Best validation loss: 0.032294

--- Training Fold 2/5 ---
 Epoch 020 | Val Loss: 0.038847
 Epoch 040 | Val Loss: 0.036899
 Epoch 060 | Val Loss: 0.035268

Early stopping at epoch 66. Best validation loss: 0.033471

--- Training Fold 3/5 ---
 Epoch 020 | Val Loss: 0.038491
 Epoch 040 | Val Loss: 0.035928
 Epoch 060 | Val Loss: 0.036320
 Epoch 080 | Val Loss: 0.033668

Early stopping at epoch 92. Best validation loss: 0.033142

--- Training Fold 4/5 ---
 Epoch 020 | Val Loss: 0.040885
 Epoch 040 | Val Loss: 0.035814
 Epoch 060 | Val Loss: 0.032859
 Epoch 080 | Val Loss: 0.032837

Early stopping at epoch 82. Best validation loss: 0.032535

--- Training Fold 5/5 ---
 Epoch 020 | Val Loss: 0.039765
 Epoch 040

In [9]:
# Save the Prediction Arrays
SAVE_PATH = './NN_model_predictions/'
os.makedirs(SAVE_PATH, exist_ok=True)
np.save(os.path.join(SAVE_PATH, 'oof_nn_preds.npy'), oof_nn_preds)
np.save(os.path.join(SAVE_PATH, 'test_nn_preds.npy'), test_nn_preds)
print(f"\nPrediction arrays saved successfully to: '{SAVE_PATH}'")


Prediction arrays saved successfully to: './NN_model_predictions/'
