## Import

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn

import pandas as pd

import os

import numpy as np

from skorch import NeuralNetRegressor
from skorch.callbacks import EarlyStopping, Checkpoint, LRScheduler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from skorch.helper import predefined_split
from skorch.dataset import Dataset

In [7]:
def create_scaled_data_by_col(df, min_max_cols, normalize_cols, y_cols, col_name, col):
    if col_name in min_max_cols:
        min_max_cols.remove(col_name)
    if col_name in normalize_cols:
        normalize_cols.remove(col_name)

    db = df[df[col_name] == col]
    db = db.drop(columns=[col_name])

    x_min_max = db[min_max_cols].values.astype(np.float32)
    x_normalize = db[normalize_cols].values.astype(np.float32)
    y = db[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)

    min_max_scaler = MinMaxScaler().fit(x_min_max)
    normalize_scaler = StandardScaler().fit(x_normalize)
    y_scaler = StandardScaler().fit(y)

    x_min_max = min_max_scaler.transform(x_min_max)
    x_normalize = normalize_scaler.transform(x_normalize)
    y_final = y_scaler.transform(y)

    db[min_max_cols] = x_min_max
    db[normalize_cols] = x_normalize
    db[y_cols] = y_final
    

    return (db, min_max_scaler, normalize_scaler, y_scaler)



## Loading Data and Scaling

In [8]:
data_dir = 'data/'
df = pd.read_csv(os.path.join(data_dir, 'train_data.csv'))
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by=['date', 'store_nbr'])
display(df.head())

Unnamed: 0,date,store_nbr,family,sales,onpromotion,city,state,store_type,cluster,oil,...,dow_avg_sales,dow_rolling_3_sales,dow_rolling_7_sales,dow_avg_transactions,dow_rolling_3_transactions,dow_rolling_7_transactions,rolling_7_sales,rolling_14_sales,rolling_7_transactions,rolling_14_transactions
0,2013-02-01,1,0,3.0,0,0,0,0,13,97.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2013-02-01,1,1,0.0,0,0,0,0,13,97.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2013-02-01,1,2,0.0,0,0,0,0,13,97.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2013-02-01,1,3,941.0,0,0,0,0,13,97.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2013-02-01,1,4,0.0,0,0,0,0,13,97.46,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
min_max_cols = ['store_nbr', 'city', 'state', 'store_type', 'cluster', 'h_type_nat', 'h_description_nat', 'h_transferred_nat', 'h_type_loc', 'h_description_loc', 'h_transferred_loc', 'month', 'day', 'day_of_week']
normalize_cols = ['onpromotion', 'oil', 'dow_avg_sales', 'dow_rolling_3_sales', 'rolling_7_sales', 'rolling_14_sales', 'dow_avg_transactions', 'dow_rolling_3_transactions', 'rolling_7_transactions', 'rolling_14_transactions']
x_cols = min_max_cols + normalize_cols
y_cols = ['sales']
split_col = 'family'

print(min_max_cols)
print(df.columns)

final_run = False

if final_run:
    train_df = df
else:
    rows_before = (df['date'] < '2017-07-27')
    rows_after = ~rows_before

    print('rows_before', rows_before.sum())
    print('rows_after', rows_after.sum())
    print('rows_total', len(df))

    train_df = df[rows_before]
    val_df = df[rows_after]

    print(train_df['date'].max())
    print(val_df['date'].min())

train_df_by_cluster = {}
scaler_x_by_cluster = {}
scaler_y_by_cluster = {}

for cluster in df[split_col].unique():
    cluster_df, cluster_min_max_scaler, cluster_normalize_scaler, cluster_y_scaler = create_scaled_data_by_col(train_df, min_max_cols, normalize_cols, y_cols, split_col, cluster)
    train_df_by_cluster[cluster] = cluster_df
    scaler_x_by_cluster[cluster] = (cluster_min_max_scaler, cluster_normalize_scaler)
    scaler_y_by_cluster[cluster] = cluster_y_scaler

if not final_run:
    val_df_by_cluster = {}

    for cluster in df[split_col].unique():
        val_cluster_min_max_scaler, val_cluster_normalize_scaler = scaler_x_by_cluster[cluster]
        val_cluster_y_scaler = scaler_y_by_cluster[cluster]

        val_cluster_df = val_df[val_df[split_col] == cluster]
        val_cluster_df = val_cluster_df.drop(columns=split_col)

        val_cluster_x_min_max = val_cluster_df[min_max_cols].values.astype(np.float32)
        val_cluster_x_normalize = val_cluster_df[normalize_cols].values.astype(np.float32)
        val_cluster_y = val_cluster_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)

        val_cluster_x_min_max = val_cluster_min_max_scaler.transform(val_cluster_x_min_max)
        val_cluster_x_normalize = val_cluster_normalize_scaler.transform(val_cluster_x_normalize)
        val_cluster_y = val_cluster_y_scaler.transform(val_cluster_y)

        val_cluster_df[min_max_cols] = val_cluster_x_min_max
        val_cluster_df[normalize_cols] = val_cluster_x_normalize
        val_cluster_df[y_cols] = val_cluster_y

        val_df_by_cluster[cluster] = val_cluster_df

['store_nbr', 'city', 'state', 'store_type', 'cluster', 'h_type_nat', 'h_description_nat', 'h_transferred_nat', 'h_type_loc', 'h_description_loc', 'h_transferred_loc', 'month', 'day', 'day_of_week']
Index(['date', 'store_nbr', 'family', 'sales', 'onpromotion', 'city', 'state',
       'store_type', 'cluster', 'oil', 'h_type_nat', 'h_description_nat',
       'h_transferred_nat', 'h_type_loc', 'h_description_loc',
       'h_transferred_loc', 'year', 'month', 'day', 'day_of_week',
       'dow_avg_sales', 'dow_rolling_3_sales', 'dow_rolling_7_sales',
       'dow_avg_transactions', 'dow_rolling_3_transactions',
       'dow_rolling_7_transactions', 'rolling_7_sales', 'rolling_14_sales',
       'rolling_7_transactions', 'rolling_14_transactions'],
      dtype='object')
rows_before 2910006
rows_after 35640
rows_total 2945646
2017-07-26 00:00:00
2017-07-27 00:00:00


## Neural Network

In [20]:
import torch.nn as nn

class FFNeuralNetwork(nn.Module):
    def __init__(self, input_dim, out_dim, hidden_dim=64, num_hidden_layers=2):
        super(FFNeuralNetwork, self).__init__()

        self.act = nn.ReLU()
        self.head = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, out_dim)

        layers = []
        for i in range(num_hidden_layers):
            layers.append(nn.Linear(hidden_dim, hidden_dim ))

        self.stem = nn.Sequential(*layers)


    def forward(self, x):
        x = self.act(self.head(x))

        for layer in self.stem:
            x =  x + self.act(layer(x))

        return self.output(x)

class LSTMNeuralNetwork(nn.Module):
    def __init__(self, input_dim, endogenous_dim, endogenous_len, exogenous_dim, hidden_dim, out_dim, out_seq_len, num_layers):
        super(LSTMNeuralNetwork, self).__init__()

        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.out_seq_len = out_seq_len
        self.endogenous_dim = endogenous_dim
        self.exogenous_dim = exogenous_dim

        self.act = nn.ReLU()
        self.linear_proj = nn.Linear(endogenous_dim* endogenous_len, (input_dim - exogenous_dim) * out_seq_len)
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.output_linear = nn.Linear(hidden_dim, out_dim)

    def forward(self, x):
        endogenous = x[0]
        exogenous = x[1]
        out = self.act(self.linear_proj(endogenous))

        out = out.view(-1, self.out_seq_len, self.input_dim - self.exogenous_dim)
        out = torch.cat((out, exogenous), dim=-1)

        out, _ = self.lstm(out)

        out = self.output_linear(out)

        return out
    
class LSTMDataset(Dataset):
    def __init__(self, data, endogenous_len, out_seq_len, index_col, endogenous_cols, exogenous_cols, out_cols, add_order=None):
        self.data = data.sort_values(by=[index_col, add_order] if add_order is not None else [index_col]) 
        self.add_len = data[add_order].nunique() if add_order is not None else 0
        self.add_cols = add_order if add_order is not None else None
        self.endogenous_len = endogenous_len
        self.out_seq_len = out_seq_len
        self.index_col = index_col
        self.endogenous_cols = endogenous_cols
        self.exogenous_cols = exogenous_cols
        self.out_cols = out_cols
        self.indices = {i:j for i,j in enumerate(self.data[self.index_col].unique())}

    def __len__(self):
        return len(self.indices) - self.endogenous_len - self.out_seq_len

    def __getitem__(self, idx):
        date_in = self.indices[idx]
        date_mid = self.indices[idx + self.endogenous_len -1]
        date_end = self.indices[idx + self.endogenous_len + self.out_seq_len -1]

        in_rows = (self.data[self.index_col] >= date_in) & (self.data[self.index_col] <= date_mid)
        out_rows = (self.data[self.index_col] > date_mid) & (self.data[self.index_col] <= date_end)

        endog = self.data[in_rows][self.endogenous_cols].values.reshape(-1)
        exog = self.data[out_rows][self.exogenous_cols]
        if self.add_cols is not None:
            exog = exog.iloc[[self.add_len * i for i in range(self.out_seq_len)]]
            exog = exog.values.reshape(self.out_seq_len, len(self.exogenous_cols))


        y = self.data[out_rows][self.out_cols].values
        y = y.reshape(self.out_seq_len, self.add_len)

        sample = {
            'endog': endog,
            'exog': exog,
            'label': y
        }
        return sample


## NN Training

In [45]:
net_by_cluster = {}
train_params = {
                "criterion": nn.L1Loss,
                "optimizer": torch.optim.AdamW,
                "optimizer__weight_decay": 1e-8,
                #'train_split' : None,
                #"train_split": predefined_split(Dataset(val_x, val_y)),
                "lr": 0.001,
                "batch_size": 128,
                "max_epochs": 1000,
                "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                "iterator_train__shuffle": False,
                "iterator_train__num_workers": 2,
                "iterator_train__pin_memory": True,
                "iterator_valid__shuffle": False,
                "iterator_valid__num_workers": 2,
                "iterator_valid__pin_memory": True,
                "verbose": 2,
        }

net_params = {
    'input_dim': len(x_cols),
    'out_dim': 1,
    'hidden_dim': 200,
    'num_hidden_layers': 6,
    }

In [None]:
for cluster in df[split_col].unique():
    train_df = train_df_by_cluster[cluster]
    train_x = train_df[x_cols].values.astype(np.float32)
    train_y = train_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)

    if not final_run:
        val_df = val_df_by_cluster[cluster]
        train_params['train_split'] = predefined_split(Dataset(val_df[x_cols].values.astype(np.float32), val_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)))
    else:
        train_params['train_split'] = None

    callbacks = [EarlyStopping(patience=15, threshold=0.001, threshold_mode='abs', monitor='valid_loss', lower_is_better=True),
            Checkpoint(monitor='valid_loss_best', f_params=f'sales_forecaster_{cluster}.pt', dirname='models/'),
            LRScheduler(policy=ReduceLROnPlateau, monitor='valid_loss', factor=0.5, patience=5, threshold=0.001, threshold_mode='abs', mode='min', verbose=True)
            ]

    train_params['callbacks'] = callbacks

    net = NeuralNetRegressor(FFNeuralNetwork(**net_params), **train_params)

    net.fit(train_x, train_y)
    net_by_cluster[cluster] = net



  epoch    train_loss    valid_loss    cp     dur
-------  ------------  ------------  ----  ------
      1        [36m0.4535[0m        [32m0.4655[0m     +  1.4670
      2        [36m0.4384[0m        [32m0.4592[0m     +  1.3887


Exception in thread Thread-37 (_pin_memory_loop):
Traceback (most recent call last):
  File "/usr/lib64/python3.12/threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "/home/codygrogan/.local/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 761, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib64/python3.12/threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "/home/codygrogan/.local/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 53, in _pin_memory_loop
    do_one_step()
  File "/home/codygrogan/.local/lib/python3.12/site-packages/torch/utils/data/_utils/pin_memory.py", line 30, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.12/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/codygrogan/.local/lib/pyt

  epoch    train_loss    valid_loss    cp     dur
-------  ------------  ------------  ----  ------
      1        [36m0.1979[0m        [32m0.2744[0m     +  1.2951
      2        [36m0.1747[0m        [32m0.2719[0m     +  1.2688
      3        [36m0.1688[0m        [32m0.2716[0m     +  1.2690
      4        [36m0.1669[0m        [32m0.2712[0m     +  1.2912
      5        [36m0.1640[0m        [32m0.2703[0m     +  1.2950
      6        [36m0.1634[0m        [32m0.2695[0m     +  1.2497
      7        [36m0.1630[0m        [32m0.2694[0m     +  1.2850
      8        [36m0.1629[0m        [32m0.2679[0m     +  1.3391
      9        [36m0.1620[0m        [32m0.2665[0m     +  1.3034


# Load Nets from Checkpoints

In [None]:
for cluster in df[split_col].unique():
    net = NeuralNetRegressor(FFNeuralNetwork(**net_params), **train_params)
    net.initialize()
    net.load_params(f_params=f'models/sales_forecaster_{cluster}.pt')
    net_by_cluster[cluster] = net

In [7]:
print(train_df_by_cluster[cluster].columns)

Index(['date', 'store_nbr', 'sales', 'onpromotion', 'city', 'state',
       'store_type', 'cluster', 'oil', 'h_type_nat', 'h_description_nat',
       'h_transferred_nat', 'h_type_loc', 'h_description_loc',
       'h_transferred_loc', 'year', 'month', 'day', 'day_of_week',
       'dow_avg_sales', 'dow_rolling_3_sales', 'dow_rolling_7_sales',
       'dow_avg_transactions', 'dow_rolling_3_transactions',
       'dow_rolling_7_transactions', 'rolling_7_sales', 'rolling_14_sales',
       'rolling_7_transactions', 'rolling_14_transactions'],
      dtype='object')


## LSTM Training

In [11]:
endogenous_cols = [
        'sales', 'onpromotion', 'oil', 
       'dow_avg_sales', 'dow_rolling_3_sales', 'dow_rolling_7_sales',
       'dow_avg_transactions', 'dow_rolling_3_transactions',
       'dow_rolling_7_transactions', 'rolling_7_sales', 'rolling_14_sales',
       'rolling_7_transactions', 'rolling_14_transactions']

exogenous_cols = [
    'h_type_nat', 'h_description_nat', 'h_transferred_nat', 'h_type_loc',
    'h_description_loc', 'h_transferred_loc', 'month', 'day', 'day_of_week', 'store_nbr'
    ]

out_cols = ['sales']


lstm_net_by_cluster = {}
lstm_net_params = {
    'input_dim': 256,
    'endogenous_dim': len(endogenous_cols)*54,
    'endogenous_len': 5,
    'exogenous_dim': len(exogenous_cols),
    'hidden_dim': 512,
    'out_dim': 54,
    'out_seq_len': 15,
    'num_layers': 2
}

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 1000

for cluster in df[split_col].unique():
    train_df = train_df_by_cluster[cluster]
    train_dataset = LSTMDataset(train_df, 5, 15, 'date', endogenous_cols, exogenous_cols, out_cols, 'store_nbr')
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=20, shuffle=False)

    val_df = val_df_by_cluster[cluster]
    val_dataset = LSTMDataset(val_df, 5, 15, 'date', endogenous_cols, exogenous_cols, out_cols, 'store_nbr')

    model = LSTMNeuralNetwork(**lstm_net_params)
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.L1Loss()
    model.train()
    model.to(device)
    model.zero_grad()

    val = val_dataset[0]
    val_endog = torch.tensor(val['endog']).unsqueeze(0).to(device).to(torch.float32)
    val_exog = torch.tensor(val['exog']).unsqueeze(0).to(device).to(torch.float32)
    val_y = torch.tensor(val['label']).unsqueeze(0).to(device).to(torch.float32)

    for epoch in range(num_epochs):
        for sample in train_loader:
            endog = sample['endog'].to(device).to(torch.float32)
            exog = sample['exog'].to(device).to(torch.float32)
            y = sample['label'].to(device).to(torch.float32)
            optim.zero_grad()
            output = model((endog, exog))
            loss = criterion(output, y)
            loss.backward()
            optim.step()
        with torch.no_grad():
            model.eval()
            val_pred = model((val_endog, val_exog))

            val_loss = criterion(val_pred, val_y)
            print(f'Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss.item()}')
            model.train()


    lstm_net_by_cluster[cluster] = model

Epoch 1/1000, Val Loss: 0.5246623754501343
Epoch 2/1000, Val Loss: 0.5180840492248535
Epoch 3/1000, Val Loss: 0.5258320569992065
Epoch 4/1000, Val Loss: 0.5234028697013855
Epoch 5/1000, Val Loss: 0.5300126075744629
Epoch 6/1000, Val Loss: 0.530662477016449
Epoch 7/1000, Val Loss: 0.5329450964927673
Epoch 8/1000, Val Loss: 0.531719446182251
Epoch 9/1000, Val Loss: 0.5411142706871033
Epoch 10/1000, Val Loss: 0.5426766872406006
Epoch 11/1000, Val Loss: 0.5436486005783081
Epoch 12/1000, Val Loss: 0.5435389876365662
Epoch 13/1000, Val Loss: 0.5330765843391418
Epoch 14/1000, Val Loss: 0.5339668989181519


KeyboardInterrupt: 

## Random Forest

In [37]:
from sklearn.ensemble import RandomForestRegressor
cluster_rfs = {}

for cluster in df[split_col].unique():
    train_df = train_df_by_cluster[cluster]

    train_x = train_df[x_cols].values.astype(np.float32)
    train_y = train_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)

    rf = RandomForestRegressor(n_estimators=100, max_depth=20, random_state=42, n_jobs=4)
    rf.fit(train_x, train_y.squeeze())

    cluster_rfs[cluster] = rf

## XGBoost

In [18]:
import xgboost as xgb
cluster_xgb = {}
for cluster in df[split_col].unique():
    train_x = train_x_by_cluster[cluster]
    train_y = train_y_by_cluster[cluster]

    xgb_model = xgb.XGBRegressor(n_estimators=1000, max_depth=12, learning_rate=0.001, random_state=42, n_jobs=2)
    xgb_model.fit(train_x, train_y.squeeze())

    cluster_xgb[cluster] = xgb_model

## Predict on Training Data

In [27]:
net_train_preds = []
rf_train_preds = []

for cluster in df[split_col].unique():
    train_df = train_df_by_cluster[cluster]

    train_x = train_df[x_cols].values.astype(np.float32)
    train_y = train_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)
    y_scaler = scaler_y_by_cluster[cluster]

    net = net_by_cluster[cluster]
    rf = cluster_rfs[cluster]

    net_preds = net.predict(train_x)
    rf_preds = rf.predict(train_x)

    train_df['sales_nn'] = net_preds
    train_df['sales_rf'] = rf_preds

## Validation Loss Evaluation

In [26]:
def rmsle(y_true, y_pred):
    return np.sqrt(np.mean(np.square(np.log1p(y_true) - np.log1p(y_pred))))

rf_preds = []
net_preds = []
xgb_preds = []
val_y_true = []

for cluster in df[split_col].unique():
    val_cluster_df = val_df_by_cluster[cluster]
    val_x = val_cluster_df[x_cols].values.astype(np.float32)
    val_y = val_cluster_df[y_cols].values.reshape(-1, len(y_cols)).astype(np.float32)

    rf = cluster_rfs[cluster]
    net = net_by_cluster[cluster]

    rf_preds.append(scaler_y_by_cluster[cluster].inverse_transform(rf.predict(val_x).reshape(-1, 1)))
    net_preds.append(scaler_y_by_cluster[cluster].inverse_transform(net.predict(val_x).reshape(-1, 1)).clip(0))    
    #xgb_preds.append(scaler_y_by_cluster[cluster].inverse_transform(cluster_xgb[cluster].predict(val_x).reshape(-1, 1)))
    val_y_true.append(scaler_y_by_cluster[cluster].inverse_transform(val_y))

rf_preds = np.concatenate(rf_preds)
net_preds = np.concatenate(net_preds)
#xgb_preds = np.concatenate(xgb_preds)
val_y_true = np.concatenate(val_y_true)

print(f'RF RMSLE: {rmsle(val_y_true, rf_preds)}')
#print(f'XGB RMSLE: {rmsle(val_y_true, xgb_preds)}')
print(f'NN RMSLE: {rmsle(val_y_true, net_preds)}')

NameError: name 'cluster_rfs' is not defined

## Loading Test Data

In [12]:
test_df = pd.read_csv(os.path.join(data_dir, 'test_data.csv'), index_col=0)
display(test_df.head())

test_x_by_cluster = {}
test_id_by_cluster = {}

for cluster in df[split_col].unique():
    test_cluster_min_max_scaler, test_cluster_normalize_scaler = scaler_x_by_cluster[cluster]
    test_cluster_y_scaler = scaler_y_by_cluster[cluster]

    test_cluster_x_df = test_df[test_df[split_col] == cluster]
    test_cluster_x_df = test_cluster_x_df.drop(columns=split_col)

    test_cluster_x_min_max = test_cluster_x_df[min_max_cols].values.astype(np.float32)
    test_cluster_x_normalize = test_cluster_x_df[normalize_cols].values.astype(np.float32)

    test_cluster_x_min_max = test_cluster_min_max_scaler.transform(test_cluster_x_min_max)
    test_cluster_x_normalize = test_cluster_normalize_scaler.transform(test_cluster_x_normalize)

    test_x_by_cluster[cluster] = np.concatenate([test_cluster_x_min_max, test_cluster_x_normalize], axis=1)
    test_id_by_cluster[cluster] = test_cluster_x_df.index


test_preds_dfs = []

for cluster in df[split_col].unique():
    test_x = test_x_by_cluster[cluster]
    id = test_id_by_cluster[cluster]
    #rf = cluster_rfs[cluster]

    #pred_rf = scaler_y_by_cluster[cluster].inverse_transform(rf.predict(test_x).reshape(-1, 1))
    #pred_xgb = scaler_y_by_cluster[cluster].inverse_transform(cluster_xgb[cluster].predict(test_x).reshape(-1, 1))
    pred_nn = scaler_y_by_cluster[cluster].inverse_transform(net_by_cluster[cluster].predict(test_x).reshape(-1, 1)).clip(0)
    
    cluster_df = pd.DataFrame(np.concatenate([pred_nn], axis=1), index=id, columns=['sales_nn'])
    #cluster_df = pd.DataFrame(np.concatenate([pred_rf, pred_nn], axis=1), index=id, columns=['sales_rf', 'sales_nn'])

    test_preds_dfs.append(cluster_df)

test_preds_df = pd.concat(test_preds_dfs)

test_df = test_df.merge(test_preds_df, on='id', how='left')

sub_df_nn = test_df[['sales_nn']]
#sub_df_rf = test_df[['sales_rf']]
#sub_df_xgb = test_df[['sales_xgb']]

#sub_df_rf = sub_df_rf.rename(columns={'sales_rf': 'sales'})
#sub_df_xgb = sub_df_xgb.rename(columns={'sales_xgb': 'sales'})
sub_df_nn = sub_df_nn.rename(columns={'sales_nn': 'sales'})


display(sub_df_nn.head())
#display(sub_df_rf.head())
#display(sub_df_xgb.head())

sub_df_nn.to_csv('data/submission_nn.csv')
#sub_df_xgb.to_csv('data/submission_xgb.csv')
#sub_df_rf.to_csv('data/submission_rf.csv')

Unnamed: 0_level_0,store_nbr,family,onpromotion,city,state,store_type,cluster,oil,h_type,h_locale,...,day,day_of_week,dow_avg_sales,rolling_7_sales,rolling_14_sales,rolling_30_sales,dow_avg_transactions,rolling_7_transactions,rolling_14_transactions,rolling_30_transactions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3000888,1,0,0,0,0,0,13,46.8,1,1,...,16,2,3.531381,3.428571,3.928571,4.133333,1863.393305,1888.857143,1863.857143,1859.3
3000889,1,1,0,0,0,0,13,46.8,1,1,...,16,2,0.0,0.0,0.0,0.0,1863.393305,1888.857143,1863.857143,1859.3
3000890,1,2,2,0,0,0,13,46.8,1,1,...,16,2,2.615063,3.857143,4.428571,3.7,1863.393305,1888.857143,1863.857143,1859.3
3000891,1,3,20,0,0,0,13,46.8,1,1,...,16,2,1845.485356,2456.142857,2471.571429,2504.466667,1863.393305,1888.857143,1863.857143,1859.3
3000892,1,4,0,0,0,0,13,46.8,1,1,...,16,2,0.167364,0.0,0.571429,0.733333,1863.393305,1888.857143,1863.857143,1859.3


Unnamed: 0_level_0,sales
id,Unnamed: 1_level_1
3000888,3.804415
3000889,0.000565
3000890,5.299076
3000891,2225.056641
3000892,6e-05
