# Initialization

Load data and model.

In [1]:
import numpy as np
import json
import datetime
import pickle
from sklearn import linear_model, kernel_ridge, tree

import utils


path_base = "../"

# Load settings
with open(path_base+"SETTINGS.json", 'r') as fid:
    settings = json.load(fid)
    print("Settings loaded.")
    
# Load data
data = utils.loadTestData(path_base+settings["TEST_DATA_PATH"])
N_test = len(data)
print("Test data: {} lines loaded".format(N_test))
store_info = utils.loadStoreInfo(path_base+settings["STORE_INFO_PATH"])
print("Store data: {} lines loaded.".format(len(store_info)))


# Load model
with open(path_base+settings["MODEL_PATH"], "rb") as fid:
    mdpk = pickle.Unpickler(fid)
    model = mdpk.load()
    print("Model loaded")


Settings loaded.
Test data: 41088 lines loaded
Store data: 1115 lines loaded.
Model loaded


# Generate features
We use the same function as in train.ipynb.

In [2]:
# Define dimension of the feature space
D = 38


def generateFeatureVector(entry):
    vector = np.zeros((D))
    
    ## Day features: 
    
    # Features 0 to 6: Day of the week
    vector[entry[1]-1] = 1.0
    
    # Feature 7: beginning of the month i.e. day in [1;10]
    if entry[2].day <= 10:
        vector[7] = 1.0
        
    # Feature 8: middle of the month i.e. day in [11;20]
    if entry[2].day > 10 and entry[2].day <= 20:
        vector[8] = 1.0
        
    # Feature 9: end of the month i.e. day > 20
    if entry[2].day > 20:
        vector[9] = 1.0
        
    # Features 10 to 21: month of the year
    vector[9+entry[2].month] = 1.0
    
    # Feature 22: is the store open on that day ?
    if entry[3] == 1:
        vector[22] = 1.0
        
    # Feature 23: promo ?
    if entry[4] == 1:
        vector[23] = 1.0
        
    # Feature 24: public holiday
    if entry[5] == 'a':
        vector[24] = 1.0
    
    # Feature 25: Easter
    if entry[5] == 'b':
        vector[25] = 1.0
        
    # Feature 26: Christmas
    if entry[5] == 'c':
        vector[26] = 1.0
        
    # Feature 27: School holiday
    if entry[6] == '1':
        vector[27] = 1.0
        
    
    ## Store Features:
    # Features 28-31: store type
    if store_info[entry[0]-1][1] == 'a':
        vector[28] = 1.0
    elif store_info[entry[0]-1][1] == 'b':
        vector[29] = 1.0
    elif store_info[entry[0]-1][1] == 'c':
        vector[30] = 1.0
    else:
        vector[31] = 1.0
        
    # Features 32-34: Assortment
    if store_info[entry[0]-1][2] == 'a':
        vector[32] = 1.0
    elif store_info[entry[0]-1][2] == 'b':
        vector[33] = 1.0
    else:
        vector[34] = 1.0
        
    # Feature 35: Competition Distance 
    vector[35] = store_info[entry[0]-1][3]
    
    # Feature 36: Days since competition started (>0)
    if store_info[entry[0]-1][4] != -1:
        starting_day = datetime.date(
            store_info[entry[0]-1][5],  # Year
            store_info[entry[0]-1][4],  # Month
            1)                          # Day (no data, so 1 by default)

        delta = entry[2] - starting_day
        if delta.days > 0:
            vector[36] = delta.days
        
    # Feature 37: Promo2
    vector[37] = store_info[entry[0]-1][6]
    
    return vector

In [3]:
# Generate the matrix containing the test set
test_set = np.zeros((N_test, D))

for i in range(N_test):
    test_set[i,:] = generateFeatureVector(data[i])
    
print(test_set[3,:])

[  0.00000000e+00   0.00000000e+00   0.00000000e+00   1.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.00000000e+00   1.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   1.00000000e+00   0.00000000e+00   0.00000000e+00   7.52000000e+03
   3.51000000e+02   0.00000000e+00]


# Estimate outputs


In [4]:
y_hat = model.predict(test_set)

print(y_hat[:10])

[ 4767.          7631.83050847  9541.39051095  6942.55445545  8113.
  5789.59868421  8335.76501767  8761.49885584  5608.          6366.32780411]


# Save outputs

In [5]:
with open(path_base+settings["SUBMISSION_PATH"], "w") as fid:
    # Write header line
    fid.write("\"Id\",\"Sales\"\n")
    
    # Write predictions
    for i in range(N_test):
        fid.write("{},{}\n".format(i+1,y_hat[i]))