This notebook is made for the M5 Forecasting - Accuracy competition on Kaggle. The purpose of this competition is to estimate the unit sales of wallmart retail goods for the next 28 days. Further details can be found here: https://www.kaggle.com/c/m5-forecasting-accuracy/leaderboard

Strategy: will preprocess data such that key data from the previous day are present in the day we are trying to predict (e.g units sold 1 day ago, 2 days ago, rolling window statistics, etc). Neural network will then look at these rows and their data and try to predict the number of units sold that day.

Then we will make a dataframe of the next 28 days in the same format, and have it predict the number of units sold. These will be our submission results.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
import gc

In [2]:
sell_prices = pd.read_csv("sell_prices.csv")

In [3]:
sell_prices.head()

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
0,CA_1,HOBBIES_1_001,11325,9.58
1,CA_1,HOBBIES_1_001,11326,9.58
2,CA_1,HOBBIES_1_001,11327,8.26
3,CA_1,HOBBIES_1_001,11328,8.26
4,CA_1,HOBBIES_1_001,11329,8.26


In [4]:
sales_train = pd.read_csv("sales_train_evaluation.csv")

In [5]:
sales_train.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,4,0,0,0,0,3,3,0,1
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,1,2,1,1,0,0,0,0,0
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,2,0,0,0,2,3,0,1
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,1,0,4,0,1,3,0,2,6
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,2,1,0,0,2,1,0


In [6]:
calendar = pd.read_csv("calendar.csv")

In [7]:
calendar.head()

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0,0,0
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1,1,0
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1,0,1


In [8]:
#adding columns in sales for days  1942 - 1969 so that these columns will also be affected by preprocessing and will be
    #split into its own dataframe later on more easily.
for day in range (1942,1970):
    col = "d_" + str(day)
    sales_train[col] = 0

In [9]:
sales_train.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
#downcasting the models to save space and computation power
#code from https://www.kaggle.com/anshuls235/m5-forecasting-eda-fe-modelling
def downcast(df):
    cols = df.dtypes.index.tolist()
    types = df.dtypes.values.tolist()
    for i,t in enumerate(types):
        if t == np.object:
            if cols[i] == 'date':
                df[cols[i]] = pd.to_datetime(df[cols[i]], format='%Y-%m-%d')
            else:
                df[cols[i]] = df[cols[i]].astype('category')
        else:
            min_num = df[cols[i]].min()
            max_num = df[cols[i]].max()
            if 'int' in str(t):
                if min_num > np.iinfo(np.int8).min and  max_num < np.iinfo(np.int8).max:
                    df[cols[i]] = df[cols[i]].astype(np.int8)
                elif min_num > np.iinfo(np.int16).min and max_num < np.iinfo(np.int16).max:
                    df[cols[i]] = df[cols[i]].astype(np.int16)
                elif min_num > np.iinfo(np.int32).min and max_num < np.iinfo(np.int32).max:
                    df[cols[i]] = df[cols[i]].astype(np.int32)
                else:
                    df[cols[i]] = df[cols[i]].astype(np.int64)
            elif 'float' in str(t):
                if min_num > np.finfo(np.float16).min and max_num < np.finfo(np.float16).max:
                    df[cols[i]] = df[cols[i]].astype(np.float16)
                elif min_num > np.finfo(np.float32).min and max_num < np.finfo(np.float32).max:
                    df[cols[i]] = df[cols[i]].astype(np.float32)
                else:
                    df[cols[i]] = df[cols[i]].astype(np.float64)
    return df

In [11]:
# for i in range(1, 1941 - 750):
#     col_name = "d_" + str(i)
#     sales_train.drop(columns=col_name, inplace = True)

In [12]:
sell_prices = downcast(sell_prices)
print("check")
sales_train = downcast(sales_train)
print("check")
calendar = downcast(calendar)

check
check


In [13]:
calendar.drop(columns = ['weekday', 'wday', 'month', 'year','event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI'], inplace = True)

In [14]:
df = pd.melt(sales_train, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], var_name='d', value_name='sold').dropna()

In [15]:
df = pd.merge(df, calendar, on = "d", how = "left")

In [16]:
#date is redundant, we already have wm_yr_wk
df.drop(columns = ['date'], inplace = True)
df.drop("state_id", axis = 1, inplace = True)
df.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,d,sold,wm_yr_wk
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,d_1,0,11101
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,d_1,0,11101
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,d_1,0,11101
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,d_1,0,11101
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,d_1,0,11101


In [17]:
df = pd.merge(df, sell_prices, on = ["store_id", "item_id", "wm_yr_wk"], how = "left")

In [18]:
#drop all rows where sell_price = NaN (product not in store yet)
df.dropna(inplace = True)
df.shape

(47735397, 9)

In [19]:
#introducing lags
lags = [1,2,3,7,28,56]
for lag in lags:
     df['sold_lag_'+str(lag)] = df.groupby(['id', 'item_id', 'dept_id', 'cat_id', 'store_id'],
                                           as_index=False)['sold'].shift(lag).astype(np.float16)

In [20]:
categorical_variables = ["item_id", "dept_id", "cat_id", "store_id"]
df["item_id"] = df["item_id"].str[-3:]
df["dept_id"] = df["dept_id"].str[-1:]
df["d"] = df["d"].str[2:]
df.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,d,sold,wm_yr_wk,sell_price,sold_lag_1,sold_lag_2,sold_lag_3,sold_lag_7,sold_lag_28,sold_lag_56
7,HOBBIES_1_008_CA_1_evaluation,8,1,HOBBIES,CA_1,1,12,11101,0.459961,,,,,,
8,HOBBIES_1_009_CA_1_evaluation,9,1,HOBBIES,CA_1,1,2,11101,1.55957,,,,,,
9,HOBBIES_1_010_CA_1_evaluation,10,1,HOBBIES,CA_1,1,0,11101,3.169922,,,,,,
11,HOBBIES_1_012_CA_1_evaluation,12,1,HOBBIES,CA_1,1,0,11101,5.980469,,,,,,
14,HOBBIES_1_015_CA_1_evaluation,15,1,HOBBIES,CA_1,1,4,11101,0.700195,,,,,,


In [21]:
#Window statistics
#rolling solid mean
df["rolling_solid_mean"] = df.groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 14).mean()).astype(np.float16)
#expanding window statistics
df["expanding_solid_mean"] = df.groupby(categorical_variables)["sold"].transform(lambda x : x.expanding(5).mean()).astype(np.float16)
#revenue (number of units sold over the last 31 days)
df["revenue"] = df.groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 31).sum()).astype(np.float16)

In [22]:
#word_tokenizer = keras.preprocessing.text.Tokenizer()
# cat_id_tokens = dict(zip(df["cat_id"], word_tokenizer.fit_on_texts(df["cat_id"])))
# state_id_tokens = dict(zip(df["state_id"], word_tokenizer.fit_on_texts(df["state_id"])))

In [23]:
cat_id_codes = dict(zip(df.cat_id, df.cat_id.cat.codes))
store_id_codes = dict(zip(df.store_id, df.store_id.cat.codes))

In [24]:
df["d"] = df["d"].astype(np.int16)
df["item_id"] = df["item_id"].astype(np.int16)
df["dept_id"] = df["dept_id"].astype(np.int16)
df["store_id"] = df.store_id.cat.codes
df["cat_id"] = df.cat_id.cat.codes
df = df[df["d"] > 57]

In [25]:
#mean encoding for categorical variables
for category in categorical_variables:
    df["mc_" + category] = df.groupby(category)["sold"].transform("mean").astype(np.float16)

In [26]:
df.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,d,sold,wm_yr_wk,sell_price,sold_lag_1,...,sold_lag_7,sold_lag_28,sold_lag_56,rolling_solid_mean,expanding_solid_mean,revenue,mc_item_id,mc_dept_id,mc_cat_id,mc_store_id
1737933,HOBBIES_1_004_CA_1_evaluation,4,1,1,0,58,0,11109,4.339844,1.0,...,0.0,,,0.428467,0.478271,,2.035156,1.21875,0.688965,1.595703
1737937,HOBBIES_1_008_CA_1_evaluation,8,1,1,0,58,0,11109,0.419922,0.0,...,0.0,0.0,15.0,0.0,2.5,18.0,1.232422,1.21875,0.688965,1.595703
1737938,HOBBIES_1_009_CA_1_evaluation,9,1,1,0,58,0,11109,1.769531,1.0,...,4.0,0.0,0.0,1.786133,2.138672,63.0,0.612793,1.21875,0.688965,1.595703
1737939,HOBBIES_1_010_CA_1_evaluation,10,1,1,0,58,0,11109,3.169922,0.0,...,0.0,0.0,0.0,0.0,0.103455,0.0,0.471924,1.21875,0.688965,1.595703
1737941,HOBBIES_1_012_CA_1_evaluation,12,1,1,0,58,0,11109,6.269531,0.0,...,2.0,0.0,2.0,0.357178,0.551758,14.0,1.15625,1.21875,0.688965,1.595703


In [27]:
df["mc_store_item_id"] = df.groupby(["store_id", "item_id"])["sold"].transform("mean").astype(np.float16)

In [28]:
#adding a trend - whether the last rolling solid window mean has been staying average, above average or below
df['rolling_solid_mean_month'] = df.groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 60).mean()).astype(np.float16)
df['selling_curve_trend'] = (df['rolling_solid_mean'] - df['rolling_solid_mean_month']).astype(np.float16)

In [29]:
df.dropna(axis=0, inplace=True)
df.reset_index(drop = True, inplace = True)
id = df.pop("id")

for the final predictions:
index out the necessary data + data from the last 56 days to do your calculations (let this be df_pred)
feed each day in the necessary dataframe into the model, fill in the results into df_pred
recalculate necessary statistics (lags, rolling_solid_mean, etc)

In [30]:
# df_pred = pd.DataFrame.copy(df[df["d"] > (1942 - 57)])

In [31]:
# df = df[df["d"] < 1942]
# # df = df[1250 < df["d"]]

In [32]:
# #split the data into X and y
# targets = df.pop("sold")
# X_train, X_test, y_train, y_test = train_test_split(df, targets, test_size = 0.1, random_state = 2)
#     #set test_size = 0 because we will use tensorflow's built in validation_split
# # del df, sales_train, sell_prices, calendar
# # gc.collect()

In [33]:
def build_tf_model():
    model = keras.Sequential()
    
    model.add(keras.layers.Reshape((X_train.shape[1],1)))
    model.add(keras.layers.Conv1D(64, 3, activation = "relu", input_shape = (X_train.shape[1],1)))
    model.add(keras.layers.MaxPool1D(pool_size = 2, strides = 1, padding="valid"))
    model.add(keras.layers.BatchNormalization())
    
    model.add(keras.layers.Conv1D(64, 3, activation = "relu"))
    model.add(keras.layers.MaxPool1D(pool_size = 2, strides = 1, padding="valid"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.GlobalAveragePooling1D())


#     model.add(keras.layers.Dense(64, activation = "relu", input_shape = [len(X_train.columns)]))
#     model.add(keras.layers.Dense(32, activation = "relu"))
#     model.add(keras.layers.Dense(16, activation = "relu"))
#     model.add(keras.layers.Dense(8, activation = "relu"))
    
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(8))
    model.add(keras.layers.Dense(1))
    
    model.compile(loss = "mse", optimizer = keras.optimizers.Adam(learning_rate = 0.05),
                  metrics = [keras.metrics.RootMeanSquaredError()])
    return model

In [76]:
def build_lstm_model():
    model = keras.Sequential()
    
    model.add(keras.layers.Reshape((1,X_train.shape[1])))
    
    model.add(keras.layers.LSTM(64, return_sequences=True, input_shape = (X_train.shape)))
    model.add(keras.layers.LSTM(32, return_sequences=True, activation="relu", input_shape = (X_train.shape[0],X_train.shape[1])))
    model.add(keras.layers.LSTM(16, return_sequences=True, activation="relu", input_shape = (X_train.shape[0],X_train.shape[1])))
    model.add(keras.layers.LSTM(8, return_sequences=True, activation="relu", input_shape = (X_train.shape[0],X_train.shape[1])))
    
#     model.add(keras.layers.Conv1D(64, 3, activation = "relu"))
#     model.add(keras.layers.MaxPool1D(pool_size = 2, strides = 1, padding="valid"))
#     model.add(keras.layers.BatchNormalization())
#     model.add(keras.layers.GlobalAveragePooling1D())
    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(8))
    model.add(keras.layers.Dense(1))
    
    model.compile(loss = "mse", optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),
                  metrics = [keras.metrics.RootMeanSquaredError()])
    return model

In [35]:
# epochs = 1
# batch_size = 100000
# # i,j = df.shape
# def train_model(model, batch_size, epochs, X_train, X_test, y_train, y_test):
#     n = 0
#     i,j = X_train.shape
#     while n + batch_size <= i:

#         train_data = X_train.iloc[n : n+batch_size, :]
#         test_data = y_train.iloc[n : n+batch_size]
#         model.fit(train_data, test_data, batch_size = 200, epochs = epochs, validation_set = ([X_test, y_test]))
#         print(str((n + batch_size)/i * 100) + "% done" )
#         n += batch_size
#         del train_data, test_data
#         gc.collect()
        
# #     train the rest of the data: index from n:
#     train_data = df.iloc[n:, :]
#     test_data = targets.iloc[n:]
#     model.fit(train_data, test_data, batch_size = 200, epochs = epochs, validation_set = ([X_test, y_test]))
#     del train_data, test_data
#     gc.collect()
#     print("finished")
    
    

In [36]:
# #Get the store ids. Will be making a model for each store
# for store in df["store_id"].unique().tolist():
#     data = df[df["store_id"] == store]
# #         df[(df['d']>=1914) & (df['d']<1942)].drop('sold',axis=1)

#     X_train = data[data["d"] < 1914].drop("sold", axis = 1)
#     y_train = data[data["d"] < 1914]["sold"]
#     X_test = data[(data['d']>=1914) & (data['d']<1942)].drop('sold',axis=1)
#     y_test = data[(data['d']>=1914) & (data['d']<1942)]['sold']
    
#     X_pred = data[data["d"] >= 1942].drop("sold", axis = 1)
    
# #     model = XGBRegressor(n_estimators = 1000, learning_rate = 0.05)
# #     model.fit(X_train, y_train,
# #              early_stopping_rounds = 10,
# #              eval_set = [(X_test, y_test)],
# #              eval_metric = "rmse",
# #              verbose = 10)

#     model = build_tf_model()
#     model.fit(X_train, y_train,
#               batch_size = 200,
#               epochs = 2, 
#               validation_set = [(X_train, y_train),(X_test, y_test)])
#     df.loc[X_pred.index.tolist(), "sold"] = model.predict(X_pred)

In [None]:
#Get the store ids. Will be making a model for each store
for store in df["store_id"].unique().tolist():
    for dept in df["dept_id"].unique().tolist():


        data = df[(df["store_id"] == store) & (df["dept_id"] == dept)]
    #         df[(df['d']>=1914) & (df['d']<1942)].drop('sold',axis=1)

        X_train = data[data["d"] < 1914].drop("sold", axis = 1)
        y_train = data[data["d"] < 1914]["sold"]
        X_test = data[(data['d']>=1914) & (data['d']<1942)].drop('sold',axis=1)
        y_test = data[(data['d']>=1914) & (data['d']<1942)]['sold']

        X_pred = data[data["d"] >= 1942].drop("sold", axis = 1)

    #     model = XGBRegressor(n_estimators = 1000, learning_rate = 0.05)
    #     model.fit(X_train, y_train,
    #              early_stopping_rounds = 10,
    #              eval_set = [(X_test, y_test)],
    #              eval_metric = "rmse",
    #              verbose = 10)

    #     model = build_tf_model()
    #     model.fit(X_train, y_train,
    #               batch_size = 200,
    #               epochs = 2, 
    #               validation_set = [(X_train, y_train),(X_test, y_test)])

        model = build_lstm_model()
        model.fit(X_train, y_train,
                  batch_size = 200,
                  epochs = 2, 
                  validation_set = [(X_train, y_train),(X_test, y_test)])

        print("predicting for store: " + str(store) + " dept id: " + str(dept))
        for d in range (1942,1970):
            predictions = model.predict(X_pred[X_pred["d"] == d])
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), "sold"] = predictions
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), "rolling_solid_mean"] = df.iloc[X_pred[X_pred["d"] == d].index.tolist()].groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 14).mean()).astype(np.float16)
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), "revenue"] = df.iloc[X_pred[X_pred["d"] == d].index.tolist()].groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 31).sum()).astype(np.float16)
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), "expanding_solid_mean"] = df.iloc[X_pred[X_pred["d"] == d].index.tolist()].groupby(categorical_variables)["sold"].transform(lambda x : x.expanding(5).mean()).astype(np.float16)
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), 'rolling_solid_mean_month'] = df.iloc[X_pred[X_pred["d"] == d].index.tolist()].groupby(categorical_variables)["sold"].transform(lambda x : x.rolling(window = 60).mean()).astype(np.float16)
            df.loc[X_pred[X_pred["d"] == d].index.tolist(), 'selling_curve_trend'] = (df.loc[X_pred[X_pred["d"] == d].index.tolist(), 'rolling_solid_mean'] - df.loc[X_pred[X_pred["d"] == d].index.tolist(), 'rolling_solid_mean_month']).astype(np.float16)

            lags = [1,2,3,7,28,56]
            for lag in lags:
                if d + lag >= 1970:
                    continue
                    df[(df["d"] == (d + lag)) & (df["store_id"] == store)]['sold_lag_'+str(lag)] = predictions

Epoch 1/2
Epoch 2/2
predicting for store: 0 dept id: 1
Epoch 1/2
Epoch 2/2
predicting for store: 0 dept id: 2
Epoch 1/2
Epoch 2/2
predicting for store: 0 dept id: 3
Epoch 1/2
Epoch 2/2
predicting for store: 1 dept id: 1
Epoch 1/2
Epoch 2/2
predicting for store: 1 dept id: 2
Epoch 1/2
Epoch 2/2
predicting for store: 1 dept id: 3
Epoch 1/2
Epoch 2/2
predicting for store: 2 dept id: 1
Epoch 1/2
Epoch 2/2
predicting for store: 2 dept id: 2
Epoch 1/2
Epoch 2/2
predicting for store: 2 dept id: 3
Epoch 1/2
Epoch 2/2
predicting for store: 3 dept id: 1


In [None]:
validation = sales_train[['id']+['d_' + str(i) for i in range(1914,1942)]]
validation['id']=pd.read_csv('sales_train_validation.csv').id
validation.columns=['id'] + ['F' + str(i + 1) for i in range(28)]

In [None]:
#another method: index out all items one by one by their index, build an ltsm model for each :"D
# do the current method first i think
# ====> look into it more, have the model take in the last 3 months worth of days and predict the next 28 days?
# ====> 

In [None]:
df["id"] = id
evaluation_results = df[df["d"] >= 1942]
evaluation_results = evaluation_results[["id", "d", "sold"]]

In [None]:
#get rid of all negative values that the neural network predicted (cant sell negative values)
evaluation_results["sold"] = evaluation_results["sold"].apply(lambda x : 0 if x < 0 else x)

In [None]:
evaluation_results = evaluation_results.pivot(index = "id", columns = "d", values = "sold").reset_index()
evaluation_results.columns=['id'] + ['F' + str(i + 1) for i in range(28)]
evaluation_results.head()

In [None]:
#fill in any null values with the average of the row
eval_id = evaluation_results.pop("id")
m = evaluation_results.mean(axis = 1)
for i, col in enumerate(evaluation_results):
    evaluation_results.iloc[:, i] = evaluation_results.iloc[:,i].fillna(m)

In [None]:
evaluation_results.insert(loc=0, column="id", value=eval_id)
evaluation_results.head()

In [None]:
evaluation_results.isnull().sum()

In [None]:
final = pd.concat([validation, evaluation_results]).reset_index(drop = True)
final.to_csv('submission3.csv',index=False)