In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim
from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
device = 'mps'
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [2]:
train = pd.read_csv("data/train.csv")
print("Train shape", train.shape )
train.head()
train2 = pd.read_csv("data/training_extra.csv")
print("Extra Train shape", train2.shape )
train2.head()
train = pd.concat([train,train2],axis=0,ignore_index=True)
print("Combined Train shape", train.shape)
test = pd.read_csv("data/test.csv")
print("Test shape", test.shape )
test.head()

Train shape (300000, 11)
Extra Train shape (3694318, 11)
Combined Train shape (3994318, 11)
Test shape (200000, 10)


Unnamed: 0,id,Brand,Material,Size,Compartments,Laptop Compartment,Waterproof,Style,Color,Weight Capacity (kg)
0,300000,Puma,Leather,Small,2.0,No,No,Tote,Green,20.671147
1,300001,Nike,Canvas,Medium,7.0,No,Yes,Backpack,Green,13.564105
2,300002,Adidas,Canvas,Large,9.0,No,Yes,Messenger,Blue,11.809799
3,300003,Adidas,Nylon,Large,1.0,Yes,No,Messenger,Green,18.477036
4,300004,,Nylon,Large,2.0,Yes,Yes,Tote,Black,9.907953


In [3]:
train.insert(5,"CWoutLaptop", train['Compartments'] - (train['Laptop Compartment'] == 'Yes').astype(int))
test.insert(5,"CWoutLaptop", test['Compartments'] - (test['Laptop Compartment'] == 'Yes').astype(int))

In [4]:
train

Unnamed: 0,id,Brand,Material,Size,Compartments,CWoutLaptop,Laptop Compartment,Waterproof,Style,Color,Weight Capacity (kg),Price
0,0,Jansport,Leather,Medium,7.0,6.0,Yes,No,Tote,Black,11.611723,112.15875
1,1,Jansport,Canvas,Small,10.0,9.0,Yes,Yes,Messenger,Green,27.078537,68.88056
2,2,Under Armour,Leather,Small,2.0,1.0,Yes,No,Messenger,Red,16.643760,39.17320
3,3,Nike,Nylon,Small,8.0,7.0,Yes,No,Messenger,Green,12.937220,80.60793
4,4,Adidas,Canvas,Medium,1.0,0.0,Yes,Yes,Messenger,Green,17.749338,86.02312
...,...,...,...,...,...,...,...,...,...,...,...,...
3994313,4194313,Nike,Canvas,,3.0,2.0,Yes,Yes,Messenger,Blue,28.098120,104.74460
3994314,4194314,Puma,Leather,Small,10.0,9.0,Yes,Yes,Tote,Blue,17.379531,122.39043
3994315,4194315,Jansport,Canvas,Large,10.0,10.0,No,No,Backpack,Red,17.037708,148.18470
3994316,4194316,Puma,Canvas,,2.0,2.0,No,No,Backpack,Gray,28.783339,22.32269


In [5]:
train["Weight Capacity (kg)"].fillna(train["Weight Capacity (kg)"].median(), inplace=True)
test["Weight Capacity (kg)"].fillna(test["Weight Capacity (kg)"].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train["Weight Capacity (kg)"].fillna(train["Weight Capacity (kg)"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test["Weight Capacity (kg)"].fillna(test["Weight Capacity (kg)"].median(), inplace=True)


In [9]:
CATS = list(train.columns[1:-2])
print(f"There are {len(CATS)} categorical columns:")
print( CATS )
print(f"There are 1 numerical column:")
print( ["Weight Capacity (kg)"] )

There are 9 categorical columns:
['Brand', 'Material', 'Size', 'Compartments', 'CWoutLaptop', 'Laptop Compartment', 'Waterproof', 'Style', 'Color']
There are 1 numerical column:
['Weight Capacity (kg)']


In [18]:
COMBO = []
for i,c in enumerate(CATS):
    combine = pd.concat([train[c],test[c]],axis=0)
    combine,_ = pd.factorize(combine)
    train[c] = combine[:len(train)]
    test[c] = combine[len(train):]
    n = f"{c}_wc"
    train[n] = train[c]*100 + train["Weight Capacity (kg)"]
    test[n] = test[c]*100 + test["Weight Capacity (kg)"]
    COMBO.append(n)
print()
print(f"We engineer {len(COMBO)} new columns!")
print( COMBO )


We engineer 8 new columns!
['Brand_wc', 'Material_wc', 'Size_wc', 'Compartments_wc', 'Laptop Compartment_wc', 'Waterproof_wc', 'Style_wc', 'Color_wc']


In [19]:
FEATURES = CATS + ["Weight Capacity (kg)"] + COMBO
print(f"We now have {len(FEATURES)} columns:")
print( FEATURES )

We now have 17 columns:
['Brand', 'Material', 'Size', 'Compartments', 'Laptop Compartment', 'Waterproof', 'Style', 'Color', 'Weight Capacity (kg)', 'Brand_wc', 'Material_wc', 'Size_wc', 'Compartments_wc', 'Laptop Compartment_wc', 'Waterproof_wc', 'Style_wc', 'Color_wc']


In [None]:
# STATISTICS TO AGGEGATE FOR OUR FEATURE GROUPS
STATS = ["mean", "std", "count", "nunique", "median", "min", "max", "skew", "range"]
# Added Mode
STATS2 = ["mean", "std"]

In [None]:

def impute_dataset(df, cols):
    fill_values = {}

    for col in cols:
        if "mean" in col:  
            fill_values[col] = df[col].mean()  # Mean for these stats
        elif "median" in col:  
            fill_values[col] = df[col].median()  # Median for these stats
        elif "count" in col or "nunique" in col:  
            fill_values[col] = 0  # 0 for count-based stats
        elif "min" in col:  
            fill_values[col] = df[col].min()
        elif "max" in col:  
            fill_values[col] = df[col].max()
        elif "range" in col:  
            fill_values[col] = df[col].max() - df[col].min()
        elif "std" in col or "skew" in col:  
            fill_values[col] = 0  # Std/Skew might be NaN if only 1 value, fill with 0

    df.fillna(fill_values, inplace=True)
    return df

In [None]:
import torch
from torch import nn
import torch.nn.functional as F

torch.manual_seed(1337)

class NeuralNetwEmbedding(nn.Module):
    def __init__(self, embedding_sizes, num_numerical, hidden_units, output_dim):
        super().__init__()

        # Embedding layers for categorical features
        self.embeddings = nn.ModuleList([
            nn.Embedding(num_categories, embedding_dim)
            for num_categories, embedding_dim in embedding_sizes.values()
        ])
        
        # Compute total embedding output size
        total_embedding_size = sum([embedding_dim for _, embedding_dim in embedding_sizes.values()])
        
        # Define MLP layers
        input_dim = total_embedding_size + num_numerical
        self.layer_1 = nn.Sequential(
            nn.Linear(input_dim, hidden_units),
            nn.BatchNorm1d(hidden_units),
            nn.ReLU(),
            nn.Dropout(p=0.2)
        )
        
        self.layer_2 = nn.Sequential(
            nn.Linear(hidden_units, hidden_units * 4),
            nn.BatchNorm1d(hidden_units * 4),
            nn.ReLU(),
            nn.Dropout(p=0.2)
        )

        self.layer_3 = nn.Sequential(
            nn.Linear(hidden_units * 4, hidden_units),
            nn.BatchNorm1d(hidden_units),
            nn.ReLU(),
            nn.Dropout(p=0.2)
        )

        self.output_layer = nn.Linear(hidden_units, output_dim)

        # Store number of categorical features
        self.num_categorical = len(embedding_sizes)

    def forward(self, X):
        # Split input tensor into categorical and numerical parts
        x_categorical = X[:, :self.num_categorical].long()  # First columns (categorical)
        x_numerical = X[:, self.num_categorical:]           # Remaining columns (numerical)

        # Convert categorical features to embeddings
        x_embedded = [emb(x_categorical[:, i]) for i, emb in enumerate(self.embeddings)]
        x_embedded = torch.cat(x_embedded, dim=1)  # Concatenate embeddings
        
        # Concatenate embeddings with numerical features
        x = torch.cat([x_embedded, x_numerical], dim=1)
        
        # Pass through MLP layers
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.output_layer(x)
        
        return x
    
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, a=0.01, mode='fan_in', nonlinearity='leaky_relu')
        if m.bias is not None:
            nn.init.zeros_(m.bias)

In [23]:
class BackpackDataset(Dataset):
    def __init__(self, df, isTest= False):
        self.isTest = isTest
        if not self.isTest:
            self.X = torch.tensor(df.drop(columns=['Price']).values, dtype=torch.float32)  # Features
            self.y = torch.tensor(df['Price'].values, dtype=torch.float32)  # Target
        else:
            self.X = torch.tensor(df.values, dtype= torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if not self.isTest:
            return self.X[idx], self.y[idx]
        else:
            return self.X[idx]

In [24]:
embedding_sizes = {col: (train[col].nunique(), min(50, max(2, int(np.sqrt(train[col].nunique())))))
                   for col in CATS}

{'Brand': (6, 2),
 'Material': (5, 2),
 'Size': (4, 2),
 'Compartments': (10, 3),
 'Laptop Compartment': (3, 2),
 'Waterproof': (3, 2),
 'Style': (4, 2),
 'Color': (7, 2)}

In [None]:
torch.manual_seed(1337)
# Training the Network
def train_step(dataloader, model, loss_fn, optimizer, metrics_fn):
    model.train()
    train_loss, train_rmse = 0, 0
    
    for batch, (X_b, y_b) in enumerate(dataloader):
        X_b, y_b = X_b.to(device), y_b.to(device)
        # Forward Pass
        y_pred = model(X_b).squeeze()
        # Loss
        loss = loss_fn(y_pred, y_b)
        e = metrics_fn(y_pred.to('cpu').detach(), y_b.to('cpu'))
        train_loss += loss.item()
        train_rmse += e
        # Zero grad
        optimizer.zero_grad()
        # Backprop
        loss.backward()
        # Optimizer Step
        optimizer.step()
        
        #print(f'Train Loss ({batch}): {loss}')

    train_loss /= len(dataloader)
    train_rmse /= len(dataloader)

    return train_loss, train_rmse

def val_step(dataloader, model, loss_fn, metrics_fn):
    model.eval()
    val_loss = 0
    val_rmse = 0
    with torch.inference_mode():
        for batch, (X_b, y_b) in enumerate(dataloader):
            X_b, y_b = X_b.to(device), y_b.to(device)
            y_pred = model(X_b).squeeze()
            val_loss += loss_fn(y_pred, y_b).item()
            val_rmse += metrics_fn(y_pred.to('cpu').detach(), y_b.to('cpu'))
        
        val_loss /= len(dataloader)
        val_rmse /= len(dataloader)

    return val_loss, val_rmse


def train_model(save_i,epochs,model, scheduler, loss_fn, optimizer, train_loader, val_loader):
    for epoch in tqdm(range(1, epochs + 1)):
        print(f'Epoch {epoch}: ----- |  Learning Rate: {scheduler.optimizer.param_groups[0]['lr']}\n')
        train_loss, train_rmse = train_step(dataloader=train_loader, model=model, loss_fn=loss_fn, optimizer=optimizer, metrics_fn=root_mean_squared_error)
        val_loss, val_rmse = val_step(dataloader=val_loader, model=model, loss_fn=loss_fn,  metrics_fn=root_mean_squared_error)
        print(f'Train Loss: {train_loss} | Validation Loss: {val_loss}')
        print(f'Train RMSE: {train_rmse} | Validation RMSE: {val_rmse}')
            # Step the scheduler based on validation loss at the end of each epoch
        
        scheduler.step(val_loss)
        print('Saving Model')
        torch.save(model.state_dict(), f"models/model_checkpoint_{save_i}_{epoch}.pth")



In [65]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [26]:
torch.manual_seed(1337)
BATCH_SIZE= 64
FOLDS = 10
kf = KFold(n_splits=FOLDS, shuffle=True, random_state=1337)
oof = np.zeros((len(train)))
pred = np.zeros((len(test)))

<bound method Module.state_dict of NeuralNetwEmbedding(
  (embeddings): ModuleList(
    (0): Embedding(6, 2)
    (1): Embedding(5, 2)
    (2): Embedding(4, 2)
    (3): Embedding(10, 3)
    (4-5): 2 x Embedding(3, 2)
    (6): Embedding(4, 2)
    (7): Embedding(7, 2)
  )
  (layer_1): Sequential(
    (0): Linear(in_features=66, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (layer_2): Sequential(
    (0): Linear(in_features=64, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (layer_3): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
  )
  (output_layer): Linear(in_



In [None]:
def make_predictions(model, dataframe):
    model.eval()
    test_dataset = BackpackDataset(dataframe, isTest=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    preds = []

    with torch.inference_mode():
        for batch, (X) in enumerate(test_loader):
            X = X.to(device)
            pred = model(X).squeeze()
            preds.append(pred.to('cpu').numpy())
    
    preds = np.concatenate(preds, axis=0)
    return preds


In [27]:
all_preds = np.zeros(len(test))

# OUTER K FOLD
for i, (train_index, test_index) in enumerate(kf.split(train)):
    print(f"### OUTER Fold {i+1} ###")

    X_train = train.loc[train_index,FEATURES+['Price']].reset_index(drop=True).copy()
    y_train = train.loc[train_index,'Price']

    X_valid = train.loc[test_index,FEATURES].reset_index(drop=True).copy()
    y_valid = train.loc[test_index,'Price']

    X_test = test[FEATURES].reset_index(drop=True).copy()

    # INNER K FOLD (TO PREVENT LEAKAGE WHEN USING PRICE)
    kf2 = KFold(n_splits=FOLDS, shuffle=True, random_state=1337)   
    for j, (train_index2, test_index2) in enumerate(kf2.split(X_train)):
        print(f" ## INNER Fold {j+1} (outer fold {i+1}) ##")

        X_train2 = X_train.loc[train_index2,FEATURES+['Price']].copy()
        X_valid2 = X_train.loc[test_index2,FEATURES].copy()

        ### FEATURE SET 1 (uses price) ###
        col = "Weight Capacity (kg)"
        tmp = X_train2.groupby(col).Price.agg(STATS)
        tmp.columns = [f"TE1_wc_{s}" for s in STATS]
        X_valid2 = X_valid2.merge(tmp, on=col, how="left")
        for c in tmp.columns:
            X_train.loc[test_index2,c] = X_valid2[c].values

        ### FEATURE SET 2 (uses price) ###
        for col in COMBO:
            tmp = X_train2.groupby(col).Price.agg(STATS2)
            tmp.columns = [f"TE2_{col}_{s}" for s in STATS2]
            X_valid2 = X_valid2.merge(tmp, on=col, how="left")
            for c in tmp.columns:
                X_train.loc[test_index2,c] = X_valid2[c].values

    ### FEATURE SET 1 (uses price) ###
    col = "Weight Capacity (kg)"
    tmp = X_train.groupby(col).Price.agg(STATS)
    tmp.columns = [f"TE1_wc_{s}" for s in STATS]
    X_valid = X_valid.merge(tmp, on=col, how="left")
    X_test = X_test.merge(tmp, on=col, how="left")

    ### FEATURE SET 2 (uses price) ###
    for col in COMBO:
        tmp = X_train.groupby(col).Price.agg(STATS2)
        tmp.columns = [f"TE2_{col}_{s}" for s in STATS2]
        X_valid = X_valid.merge(tmp, on=col, how="left")
        X_test = X_test.merge(tmp, on=col, how="left")

    ### FEATURE SET 3 (does not use price) ###
    for col in CATS:
        col2 = "Weight Capacity (kg)"
        tmp = X_train.groupby(col)[col2].agg(STATS2)
        tmp.columns = [f"FE3_{col}_wc_{s}" for s in STATS2]
        X_train = X_train.merge(tmp, on=col, how="left")
        X_valid = X_valid.merge(tmp, on=col, how="left")
        X_test = X_test.merge(tmp, on=col, how="left")

    # CONVERT TO CATS SO XGBOOST RECOGNIZES THEM
    X_train[CATS] = X_train[CATS].astype("category")
    X_valid[CATS] = X_valid[CATS].astype("category")
    X_test[CATS] = X_test[CATS].astype("category")
    
    TE_COLUMNS = [col for col in X_train.columns if col.startswith("TE")]  # Identify target-encoded columns
    
    df_train = X_train
    X_valid['Price'] = y_valid.values
    df_val = X_valid
    
    df_train = impute_dataset(df_train, cols=TE_COLUMNS)
    df_val = impute_dataset(df_val, cols=TE_COLUMNS)
    X_test = impute_dataset(X_test, cols=TE_COLUMNS)

    train_dataset = BackpackDataset(df=df_train, isTest= False)
    val_dataset = BackpackDataset(df=df_val, isTest=False)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    model_w_emb = NeuralNetwEmbedding(embedding_sizes=embedding_sizes, num_numerical=49, hidden_units=64,output_dim= 1)
    model_w_emb = model_w_emb.to(device)
    model_w_emb.apply(init_weights)
    
    
    loss_fn = RMSELoss()
    optimizer = torch.optim.Adam(model_w_emb.parameters(), lr=1e-3)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.05, patience=4, verbose=True)
    epochs = 10
    train_model(save_i=i, epochs=epochs, model=model_w_emb, scheduler=scheduler, loss_fn=loss_fn, optimizer=optimizer, train_loader=train_loader, val_loader=val_loader)
    preds = make_predictions(model=model_w_emb,dataframe=X_test)
    all_preds += preds

all_preds /= FOLDS

### OUTER Fold 1 ###
 ## INNER Fold 1 (outer fold 1) ##
 ## INNER Fold 2 (outer fold 1) ##
 ## INNER Fold 3 (outer fold 1) ##
 ## INNER Fold 4 (outer fold 1) ##
 ## INNER Fold 5 (outer fold 1) ##
 ## INNER Fold 6 (outer fold 1) ##
 ## INNER Fold 7 (outer fold 1) ##


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 5: ----- |  Learning Rate: 0.001



 20%|██        | 1/5 [06:14<24:57, 374.47s/it]

Train Loss: 1593.5727767294006 | Validation Loss: 1500.851268735794
Train RMSE: 39.661936145046276 | Validation RMSE: 38.67678580333278
Saving Model
Epoch 5: ----- |  Learning Rate: 0.001



 40%|████      | 2/5 [12:27<18:41, 373.85s/it]

Train Loss: 1525.6680226486433 | Validation Loss: 1500.5667083795
Train RMSE: 38.99193993351677 | Validation RMSE: 38.67354315176984
Saving Model
Epoch 5: ----- |  Learning Rate: 0.001



 60%|██████    | 3/5 [18:41<12:27, 373.52s/it]

Train Loss: 1521.5936118026511 | Validation Loss: 1498.5764703968814
Train RMSE: 38.93968092889664 | Validation RMSE: 38.64716214394373
Saving Model
Epoch 5: ----- |  Learning Rate: 0.001



 80%|████████  | 4/5 [24:54<06:13, 373.42s/it]

Train Loss: 1519.130942263517 | Validation Loss: 1497.2672742772604
Train RMSE: 38.90899720150025 | Validation RMSE: 38.630103561738785
Saving Model
Epoch 5: ----- |  Learning Rate: 0.001



100%|██████████| 5/5 [31:11<00:00, 374.32s/it]

Train Loss: 1517.5672818425348 | Validation Loss: 1496.8401496240301
Train RMSE: 38.88918227270167 | Validation RMSE: 38.62445239239136
Saving Model
### OUTER Fold 2 ###





 ## INNER Fold 1 (outer fold 2) ##
 ## INNER Fold 2 (outer fold 2) ##
 ## INNER Fold 3 (outer fold 2) ##
 ## INNER Fold 4 (outer fold 2) ##
 ## INNER Fold 5 (outer fold 2) ##
 ## INNER Fold 6 (outer fold 2) ##
 ## INNER Fold 7 (outer fold 2) ##


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 10: ----- |  Learning Rate: 0.001



 20%|██        | 1/5 [06:13<24:53, 373.41s/it]

Train Loss: 1515.8483657038264 | Validation Loss: 1499.7557652551745
Train RMSE: 38.86675049608111 | Validation RMSE: 38.66263202609092
Saving Model
Epoch 10: ----- |  Learning Rate: 0.001



 40%|████      | 2/5 [12:27<18:41, 373.95s/it]

Train Loss: 1514.0609518843808 | Validation Loss: 1498.8533770773859
Train RMSE: 38.84362744176216 | Validation RMSE: 38.65055749110075
Saving Model
Epoch 10: ----- |  Learning Rate: 0.001



 60%|██████    | 3/5 [18:42<12:28, 374.49s/it]

Train Loss: 1512.8071201906907 | Validation Loss: 1498.473057736191
Train RMSE: 38.828113677984895 | Validation RMSE: 38.64633744294238
Saving Model
Epoch 10: ----- |  Learning Rate: 0.001



 80%|████████  | 4/5 [24:56<06:14, 374.01s/it]

Train Loss: 1511.8233892441651 | Validation Loss: 1498.5560029527435
Train RMSE: 38.81589952416872 | Validation RMSE: 38.64733131930768
Saving Model
Epoch 10: ----- |  Learning Rate: 0.001



100%|██████████| 5/5 [31:09<00:00, 373.96s/it]

Train Loss: 1510.4230663196306 | Validation Loss: 1498.3721206206385
Train RMSE: 38.79853399836957 | Validation RMSE: 38.64484017284319
Saving Model
### OUTER Fold 3 ###





 ## INNER Fold 1 (outer fold 3) ##
 ## INNER Fold 2 (outer fold 3) ##
 ## INNER Fold 3 (outer fold 3) ##
 ## INNER Fold 4 (outer fold 3) ##
 ## INNER Fold 5 (outer fold 3) ##
 ## INNER Fold 6 (outer fold 3) ##
 ## INNER Fold 7 (outer fold 3) ##


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 15: ----- |  Learning Rate: 0.0001



 20%|██        | 1/5 [06:14<24:56, 374.19s/it]

Train Loss: 1509.4802668191971 | Validation Loss: 1497.8574135644706
Train RMSE: 38.785429895442434 | Validation RMSE: 38.63754596582987
Saving Model
Epoch 15: ----- |  Learning Rate: 0.0001



 40%|████      | 2/5 [12:29<18:44, 374.99s/it]

Train Loss: 1509.2372537792326 | Validation Loss: 1497.4135083095437
Train RMSE: 38.783111209155415 | Validation RMSE: 38.63137222715112
Saving Model
Epoch 15: ----- |  Learning Rate: 0.0001



 60%|██████    | 3/5 [18:43<12:28, 374.26s/it]

Train Loss: 1509.3275217427363 | Validation Loss: 1497.5926902500596
Train RMSE: 38.784388412674694 | Validation RMSE: 38.63401756018959
Saving Model
Epoch 15: ----- |  Learning Rate: 0.0001



 80%|████████  | 4/5 [24:57<06:14, 374.49s/it]

Train Loss: 1509.0712462468446 | Validation Loss: 1497.484928567509
Train RMSE: 38.78066302374611 | Validation RMSE: 38.63252367600464
Saving Model
Epoch 15: ----- |  Learning Rate: 0.0001



100%|██████████| 5/5 [31:01<00:00, 372.26s/it]

Train Loss: 1508.8027136180724 | Validation Loss: 1497.5004780950328
Train RMSE: 38.775964025241784 | Validation RMSE: 38.632716073828185
Saving Model
### OUTER Fold 4 ###





 ## INNER Fold 1 (outer fold 4) ##
 ## INNER Fold 2 (outer fold 4) ##
 ## INNER Fold 3 (outer fold 4) ##
 ## INNER Fold 4 (outer fold 4) ##
 ## INNER Fold 5 (outer fold 4) ##
 ## INNER Fold 6 (outer fold 4) ##
 ## INNER Fold 7 (outer fold 4) ##


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 20: ----- |  Learning Rate: 1e-05



 20%|██        | 1/5 [06:02<24:08, 362.17s/it]

Train Loss: 1508.5312658475086 | Validation Loss: 1497.0801971148032
Train RMSE: 38.77342827103147 | Validation RMSE: 38.6278348757356
Saving Model
Epoch 20: ----- |  Learning Rate: 1e-05



 40%|████      | 2/5 [12:01<18:01, 360.45s/it]

Train Loss: 1508.6391107368954 | Validation Loss: 1497.0032304205024
Train RMSE: 38.775174441761095 | Validation RMSE: 38.626716484298704
Saving Model
Epoch 20: ----- |  Learning Rate: 1e-05



 60%|██████    | 3/5 [21:36<15:16, 458.24s/it]

Train Loss: 1508.6032141846215 | Validation Loss: 1496.93081610959
Train RMSE: 38.774390849537035 | Validation RMSE: 38.62573979541472
Saving Model
Epoch 20: ----- |  Learning Rate: 1e-05



 80%|████████  | 4/5 [27:34<06:58, 418.79s/it]

Train Loss: 1508.6593313911703 | Validation Loss: 1496.9920601449028
Train RMSE: 38.77595597638609 | Validation RMSE: 38.62629080403833
Saving Model
Epoch 20: ----- |  Learning Rate: 1e-05



100%|██████████| 5/5 [33:33<00:00, 402.61s/it]

Train Loss: 1508.6584320775735 | Validation Loss: 1497.313758264801
Train RMSE: 38.7755385050867 | Validation RMSE: 38.63086409751148
Saving Model
### OUTER Fold 5 ###





 ## INNER Fold 1 (outer fold 5) ##
 ## INNER Fold 2 (outer fold 5) ##
 ## INNER Fold 3 (outer fold 5) ##
 ## INNER Fold 4 (outer fold 5) ##
 ## INNER Fold 5 (outer fold 5) ##
 ## INNER Fold 6 (outer fold 5) ##
 ## INNER Fold 7 (outer fold 5) ##


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 25: ----- |  Learning Rate: 1.0000000000000002e-06



  0%|          | 0/5 [03:21<?, ?it/s]


KeyboardInterrupt: 

In [61]:
torch.manual_seed(1337)


preds_nn_emb = make_predictions(model_w_emb, X_test)

Unnamed: 0,id,Price
0,300000,79.989791
1,300001,83.249299
2,300002,86.406607
3,300003,76.559465
4,300004,79.384238
...,...,...
199995,499995,81.018658
199996,499996,80.295455
199997,499997,82.905794
199998,499998,82.028964


In [52]:
model_state_dict = torch.load('models/best_nn_emb.pth', weights_only=True)
nn_model_best = NeuralNetwEmbedding(embedding_sizes=embedding_sizes, num_numerical=49, hidden_units=64,output_dim= 1)
nn_model_best = nn_model_best.to(device)
nn_model_best.load_state_dict(model_state_dict)

<All keys matched successfully>

In [None]:

preds =  make_predictions(model_w_emb, X_test) 
xgb = pd.read_csv('submission_v1.csv',index_col=None)
xgb_preds = xgb.Price
new_preds = (preds + xgb_preds) / 2


sub = pd.read_csv("data/sample_submission.csv")
sub.Price = new_preds
sub.to_csv(f"results_ensembled.csv",index=False)
