# Neural Network Rossmann Sales Prediction Model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly.express as px
import torch
from torch import nn

%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10, 6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
pd.set_option('display.max_columns', None)

In [2]:
ross_df = pd.read_csv('../Data/train.csv', low_memory = False)
store_df = pd.read_csv('../Data/store.csv')
test_df = pd.read_csv('../Data/test.csv')


In [3]:
merged_train_df = ross_df.merge(store_df, how = 'left', on = 'Store')
merged_test_df = test_df.merge(store_df, how = 'left', on = 'Store')

In [4]:
merged_train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1017209 entries, 0 to 1017208
Data columns (total 18 columns):
 #   Column                     Non-Null Count    Dtype  
---  ------                     --------------    -----  
 0   Store                      1017209 non-null  int64  
 1   DayOfWeek                  1017209 non-null  int64  
 2   Date                       1017209 non-null  object 
 3   Sales                      1017209 non-null  int64  
 4   Customers                  1017209 non-null  int64  
 5   Open                       1017209 non-null  int64  
 6   Promo                      1017209 non-null  int64  
 7   StateHoliday               1017209 non-null  object 
 8   SchoolHoliday              1017209 non-null  int64  
 9   StoreType                  1017209 non-null  object 
 10  Assortment                 1017209 non-null  object 
 11  CompetitionDistance        1014567 non-null  float64
 12  CompetitionOpenSinceMonth  693861 non-null   float64
 13  CompetitionO

## Feature Engineering

### Date Features

In [5]:
def date_features(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['DayOfMonth'] = df['Date'].dt.day
    df['Month'] = df['Date'].dt.month
    df['Year'] = df['Date'].dt.year
    df['Quarter'] = df['Date'].dt.quarter
    df['IsWeekend'] = df['DayOfWeek'].isin([6, 7]).astype(int)
    df['WeekOfYear'] = df['Date'].dt.isocalendar().week

In [6]:
date_features(merged_train_df)
date_features(merged_test_df)

In [7]:
#Remove all rows where open = 0
merged_train_df = merged_train_df.loc[merged_train_df['Open'] == 1].copy()

### How long has the competition been open in Months? (Feature)

In [8]:
def competition_open_months(df):
    df['CompetitionOpenMonths'] = 12 * (df['Year'] - df['CompetitionOpenSinceYear']) + (df['Month'] - df['CompetitionOpenSinceMonth'])
    df['CompetitionOpenMonths'] = df['CompetitionOpenMonths'].map(lambda x: 0 if x < 0 else x).fillna(0)

In [9]:
competition_open_months(merged_train_df)
competition_open_months(merged_test_df)

### Feature: How long has Promo2 been running and is it currently a Promo2 Month?

In [10]:
merged_train_df.sample(2)

Unnamed: 0,Store,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,DayOfMonth,Month,Year,Quarter,IsWeekend,WeekOfYear,CompetitionOpenMonths
949558,364,6,2013-03-02,1750,266,1,0,0,0,a,c,13620.0,,,1,10.0,2014.0,"Mar,Jun,Sept,Dec",2,3,2013,1,1,9,0.0
993044,365,2,2013-01-22,6219,629,1,1,0,0,c,a,2410.0,,,1,45.0,2009.0,"Feb,May,Aug,Nov",22,1,2013,1,0,4,0.0


In [11]:
def check_promo_month(row):
    month_to_string = {
        1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
        7: 'Jul', 8: 'Aug', 9: 'Sept', 10: 'Oct', 11: 'Nov', 12: 'Dec'
    }
    try: 
        months = (row['PromoInterval'] or '').split(',')
        if row['Promo2OpenMonths'] and month_to_string[row['Month']] in months:
            return 1
        else:
            return 0
    except Exception:
        return 0
    
def promo_columns(df):
    df['Promo2OpenMonths'] = 12 * (df['Year'] - df['Promo2SinceYear']) + (df['WeekOfYear'] - df['Promo2SinceWeek']) * 7/30.5
    df['Promo2OpenMonths'] = df['Promo2OpenMonths'].map(lambda x: 0 if x < 0 else x).fillna(0) * df['Promo2']
    df['IsPromo2Month'] = df.apply(check_promo_month, axis = 1) * df['Promo2']

In [12]:
promo_columns(merged_train_df)
promo_columns(merged_test_df)

In [13]:
merged_train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 844392 entries, 0 to 1017190
Data columns (total 27 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Store                      844392 non-null  int64         
 1   DayOfWeek                  844392 non-null  int64         
 2   Date                       844392 non-null  datetime64[ns]
 3   Sales                      844392 non-null  int64         
 4   Customers                  844392 non-null  int64         
 5   Open                       844392 non-null  int64         
 6   Promo                      844392 non-null  int64         
 7   StateHoliday               844392 non-null  object        
 8   SchoolHoliday              844392 non-null  int64         
 9   StoreType                  844392 non-null  object        
 10  Assortment                 844392 non-null  object        
 11  CompetitionDistance        842206 non-null  float64     

### Store Level Aggregations (Features)

In [14]:
store_agg = merged_train_df.groupby('Store').agg(
    Store_Avg_Sales = ('Sales', 'mean'),
    Store_Median_Sales = ('Sales', 'median'),
    Store_Std_Sales = ('Sales', 'std'),
    Store_Max_Sales = ('Sales', 'max'),
    Store_Min_Sales = ('Sales', 'min')
).reset_index()
#In case only 1 data point for a store
store_agg['Store_Std_Sales'] = store_agg['Store_Std_Sales'].fillna(0)

In [15]:
merged_train_df = merged_train_df.merge(store_agg, how = 'left', on = 'Store')
merged_test_df = merged_test_df.merge(store_agg, how = 'left', on = 'Store')

## Imputation: Competition Distance

In [16]:
#Impute nan's in competition distance with 100,000km
merged_train_df['CompetitionDistance'].max()

np.float64(75860.0)

In [17]:
merged_train_df['CompetitionDistance'] = merged_train_df['CompetitionDistance'].fillna(100000)

In [18]:
merged_train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 844392 entries, 0 to 844391
Data columns (total 32 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Store                      844392 non-null  int64         
 1   DayOfWeek                  844392 non-null  int64         
 2   Date                       844392 non-null  datetime64[ns]
 3   Sales                      844392 non-null  int64         
 4   Customers                  844392 non-null  int64         
 5   Open                       844392 non-null  int64         
 6   Promo                      844392 non-null  int64         
 7   StateHoliday               844392 non-null  object        
 8   SchoolHoliday              844392 non-null  int64         
 9   StoreType                  844392 non-null  object        
 10  Assortment                 844392 non-null  object        
 11  CompetitionDistance        844392 non-null  float64 

## Input and Output Columns

In [19]:
merged_train_df.columns

Index(['Store', 'DayOfWeek', 'Date', 'Sales', 'Customers', 'Open', 'Promo',
       'StateHoliday', 'SchoolHoliday', 'StoreType', 'Assortment',
       'CompetitionDistance', 'CompetitionOpenSinceMonth',
       'CompetitionOpenSinceYear', 'Promo2', 'Promo2SinceWeek',
       'Promo2SinceYear', 'PromoInterval', 'DayOfMonth', 'Month', 'Year',
       'Quarter', 'IsWeekend', 'WeekOfYear', 'CompetitionOpenMonths',
       'Promo2OpenMonths', 'IsPromo2Month', 'Store_Avg_Sales',
       'Store_Median_Sales', 'Store_Std_Sales', 'Store_Max_Sales',
       'Store_Min_Sales'],
      dtype='object')

In [20]:
input_columns = [
    'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday', 
    'StoreType', 'Assortment','CompetitionDistance',
    'Promo2', 'DayOfMonth', 'Month', 'Year',
    'Quarter', 'IsWeekend', 'WeekOfYear', 'CompetitionOpenMonths',
    'Promo2OpenMonths', 'IsPromo2Month', 'Store_Avg_Sales',
    'Store_Median_Sales', 'Store_Std_Sales', 'Store_Max_Sales',
    'Store_Min_Sales'
]

output_column = ['Sales']

In [21]:
X_train_non_adjusted = merged_train_df[input_columns].copy()
y_train_non_adjusted = merged_train_df[output_column].copy()

X_test_non_adjusted = merged_test_df[input_columns].copy()

In [22]:
numeric_columns = [
    'Promo', 'SchoolHoliday', 'CompetitionDistance', 'Promo2',
    'Year', 'IsWeekend', 'CompetitionOpenMonths', 'Promo2OpenMonths',
    'IsPromo2Month', 'Store_Avg_Sales',
    'Store_Median_Sales', 'Store_Std_Sales', 'Store_Max_Sales',
    'Store_Min_Sales'
]
categorical_columns = [
    'DayOfWeek', 'DayOfMonth', 'StateHoliday', 'StoreType', 
    'Assortment', 'Month', 'Quarter', 'WeekOfYear'
]

## Scalling Numeric Features

In [23]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train_non_adjusted[numeric_columns])

In [24]:
X_train_non_adjusted[numeric_columns] = scaler.transform(X_train_non_adjusted[numeric_columns])
X_test_non_adjusted[numeric_columns] = scaler.transform(X_test_non_adjusted[numeric_columns])

## OneHot Encoding Categorical Features

In [25]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output = False, handle_unknown = 'ignore').fit(X_train_non_adjusted[categorical_columns])
encoded_columns = list(encoder.get_feature_names_out(categorical_columns))

In [26]:
X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
X_test_non_adjusted[encoded_columns] = encoder.transform(X_test_non_adjusted[categorical_columns])

  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_adjusted[categorical_columns])
  X_train_non_adjusted[encoded_columns] = encoder.transform(X_train_non_a

## Extract Final Training Data

In [27]:
X_train = X_train_non_adjusted[numeric_columns + encoded_columns].copy()
y_train = y_train_non_adjusted.copy()

X_test = X_test_non_adjusted[numeric_columns + encoded_columns].copy()

In [28]:
X_train.sample(100).to_csv('../Data/input.csv')

In [29]:
X_train.head()

Unnamed: 0,Promo,SchoolHoliday,CompetitionDistance,Promo2,Year,IsWeekend,CompetitionOpenMonths,Promo2OpenMonths,IsPromo2Month,Store_Avg_Sales,Store_Median_Sales,Store_Std_Sales,Store_Max_Sales,Store_Min_Sales,DayOfWeek_1,DayOfWeek_2,DayOfWeek_3,DayOfWeek_4,DayOfWeek_5,DayOfWeek_6,DayOfWeek_7,DayOfMonth_1,DayOfMonth_2,DayOfMonth_3,DayOfMonth_4,DayOfMonth_5,DayOfMonth_6,DayOfMonth_7,DayOfMonth_8,DayOfMonth_9,DayOfMonth_10,DayOfMonth_11,DayOfMonth_12,DayOfMonth_13,DayOfMonth_14,DayOfMonth_15,DayOfMonth_16,DayOfMonth_17,DayOfMonth_18,DayOfMonth_19,DayOfMonth_20,DayOfMonth_21,DayOfMonth_22,DayOfMonth_23,DayOfMonth_24,DayOfMonth_25,DayOfMonth_26,DayOfMonth_27,DayOfMonth_28,DayOfMonth_29,DayOfMonth_30,DayOfMonth_31,StateHoliday_0,StateHoliday_a,StateHoliday_b,StateHoliday_c,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Quarter_1,Quarter_2,Quarter_3,Quarter_4,WeekOfYear_1.0,WeekOfYear_2.0,WeekOfYear_3.0,WeekOfYear_4.0,WeekOfYear_5.0,WeekOfYear_6.0,WeekOfYear_7.0,WeekOfYear_8.0,WeekOfYear_9.0,WeekOfYear_10.0,WeekOfYear_11.0,WeekOfYear_12.0,WeekOfYear_13.0,WeekOfYear_14.0,WeekOfYear_15.0,WeekOfYear_16.0,WeekOfYear_17.0,WeekOfYear_18.0,WeekOfYear_19.0,WeekOfYear_20.0,WeekOfYear_21.0,WeekOfYear_22.0,WeekOfYear_23.0,WeekOfYear_24.0,WeekOfYear_25.0,WeekOfYear_26.0,WeekOfYear_27.0,WeekOfYear_28.0,WeekOfYear_29.0,WeekOfYear_30.0,WeekOfYear_31.0,WeekOfYear_32.0,WeekOfYear_33.0,WeekOfYear_34.0,WeekOfYear_35.0,WeekOfYear_36.0,WeekOfYear_37.0,WeekOfYear_38.0,WeekOfYear_39.0,WeekOfYear_40.0,WeekOfYear_41.0,WeekOfYear_42.0,WeekOfYear_43.0,WeekOfYear_44.0,WeekOfYear_45.0,WeekOfYear_46.0,WeekOfYear_47.0,WeekOfYear_48.0,WeekOfYear_49.0,WeekOfYear_50.0,WeekOfYear_51.0,WeekOfYear_52.0
0,1.113726,2.041038,-0.483913,-0.997372,1.502796,-0.460344,0.614306,-0.686969,-0.418274,-0.912031,-0.875946,-1.347031,-1.099009,-0.281003,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.113726,2.041038,-0.560331,1.002635,1.502796,-0.460344,0.767705,2.690605,2.390776,-0.831141,-0.817777,-0.393441,-0.88369,-0.571867,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.113726,2.041038,0.919988,1.002635,1.502796,-0.460344,0.936443,2.046518,2.390776,-0.005375,-0.032493,0.536538,0.05054,0.095873,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.113726,2.041038,-0.554872,-0.997372,1.502796,-0.460344,0.430228,-0.686969,-0.418274,1.11403,1.170027,0.126185,0.372025,2.021615,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.113726,2.041038,2.64266,-0.997372,1.502796,-0.460344,-0.597541,-0.686969,-0.418274,-0.946421,-0.889205,-0.145339,-0.695239,-0.897529,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## PyTorch Tensor Conversion

In [30]:
#Split data into a validation and training set
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train,  
    y_train,  
    test_size = 0.2,
    random_state = 42
)

In [31]:
X_train_tensor = torch.tensor(X_train.values, dtype = torch.float32)
X_valid_tensor = torch.tensor(X_valid.values, dtype = torch.float32)

y_train_tensor = torch.tensor(y_train.values, dtype = torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype = torch.float32)

X_test_tensor  = torch.tensor(X_test.values, dtype = torch.float32)

In [34]:
X_train_tensor.shape, y_train_tensor.shape

(torch.Size([675513, 131]), torch.Size([675513, 1]))

In [36]:
X_valid_tensor.shape, y_valid_tensor.shape

(torch.Size([168879, 131]), torch.Size([168879, 1]))

In [37]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
device

'mps'

In [38]:
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_valid_tensor = X_valid_tensor.to(device)
y_valid_tensor = y_valid_tensor.to(device)
X_test_tensor  = X_test_tensor.to(device)

## Creating Neural Network Model, Loss Function, and Optimizer

In [40]:
n_features = X_train_tensor.shape[1]
n_features

131

In [53]:
class SalesRegressor(nn.Module):
    def __init__(self, n_features: int):
        super().__init__()
        self.linear_layers = nn.Sequential(
            nn.Linear(in_features = n_features, out_features = 128),
            nn.ReLU(),
            nn.Linear(in_features = 128, out_features = 64),
            nn.ReLU(),
            nn.Linear(in_features = 64, out_features = 1)
        )
    def forward(self, x):
        return self.linear_layers(x)
    
model = SalesRegressor(n_features = 131).to(device)
model

SalesRegressor(
  (linear_layers): Sequential(
    (0): Linear(in_features=131, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [54]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = 1e-2,
    weight_decay = 1e-5
)

## Training Loop

In [55]:
epochs = 500
torch.manual_seed(42)

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad() #Zero grad optimizer
    y_pred = model(X_train_tensor) #Forward pass
    loss = loss_fn(y_pred, y_train_tensor) #Calculate loss
    loss.backward() #Backpropagation
    optimizer.step() #Gradient Descent

    model.eval()
    with torch.inference_mode():
        valid_pred = model(X_valid_tensor)
        valid_loss = loss_fn(valid_pred, y_valid_tensor)

    if epoch % 10 == 0:
        train_rsme = loss.item() ** 0.5
        valid_rsme = valid_loss.item() ** 0.5
        print(f"Epoch {epoch} | Train MSE: {train_rsme:.4f} | Validation MSE: {valid_rsme:.4f}")

Epoch 0 | Train MSE: 7616.8545 | Validation MSE: 7616.0643
Epoch 10 | Train MSE: 7587.2280 | Validation MSE: 7577.0744
Epoch 20 | Train MSE: 7345.7683 | Validation MSE: 7296.0485
Epoch 30 | Train MSE: 6461.5534 | Validation MSE: 6314.6135
Epoch 40 | Train MSE: 4263.9512 | Validation MSE: 3963.1566
Epoch 50 | Train MSE: 1779.6081 | Validation MSE: 1856.1354
Epoch 60 | Train MSE: 2089.4652 | Validation MSE: 1981.9816
Epoch 70 | Train MSE: 1624.2133 | Validation MSE: 1648.4933
Epoch 80 | Train MSE: 1566.8770 | Validation MSE: 1553.4906
Epoch 90 | Train MSE: 1493.4551 | Validation MSE: 1498.0569
Epoch 100 | Train MSE: 1453.0570 | Validation MSE: 1454.1063
Epoch 110 | Train MSE: 1433.2953 | Validation MSE: 1435.6332
Epoch 120 | Train MSE: 1412.4785 | Validation MSE: 1414.9241
Epoch 130 | Train MSE: 1398.6353 | Validation MSE: 1400.6743
Epoch 140 | Train MSE: 1387.0356 | Validation MSE: 1389.1836
Epoch 150 | Train MSE: 1377.4548 | Validation MSE: 1379.4853
Epoch 160 | Train MSE: 1369.7249 | 

In [56]:
model.eval()
with torch.inference_mode():
    test_preds = model(X_test_tensor.to(device))

In [57]:
test_preds = test_preds.cpu().numpy().flatten()

In [63]:
X_test = pd.read_csv('../Data/test.csv')
submission = pd.DataFrame({
    "Id": X_test['Id'],
    "Open": X_test['Open'],
    "Sales": test_preds  
})

In [66]:
submission.loc[submission['Open'] == 0, 'Sales'] = 0

In [69]:
submission.drop(columns = ['Open'], inplace = True)

In [70]:
submission.to_csv('../Data/nn.csv', index = False)

In [72]:
torch.save(model.state_dict(), 'model.pth')