# Initialization

Load data and model.

In [12]:
import numpy as np
import json
import datetime
import pickle
from sklearn import linear_model, kernel_ridge

import utils


path_base = "../"

# Load settings
with open(path_base+"SETTINGS.json", 'r') as fid:
    settings = json.load(fid)
    print("Settings loaded.")
    
# Load data
data = utils.loadTestData(path_base+settings["TEST_DATA_PATH"])
N_test = len(data)
print("Test data: {} lines loaded".format(N_test))
store_info = utils.loadStoreInfo(path_base+settings["STORE_INFO_PATH"])
print("Store data: {} lines loaded.".format(len(store_info)))


# Load model
with open(path_base+settings["MODEL_PATH"], "rb") as fid:
    mdpk = pickle.Unpickler(fid)
    model = mdpk.load()
    print("Model loaded")


Settings loaded.
Test data: 41088 lines loaded
Store data: 1115 lines loaded.
Model loaded


# Generate features
We use the same function as in train.ipynb.

In [13]:
# Define dimension of the feature space
D = 15

# Generate a feature vectore from one entry
def generateFeatureVector(entry):
    ################################## Local functions
    
    # Local function to convert store asortment
    def ConvertAsortment(assortment):
        if assortment == "a":
            return 1
        elif assortment == "b":
            return 2
        else:
            return 3
        
    def GetCompetTime(entry):
        if store_info[entry[0]-1][4] == -1:
            return 0   # No entry available
        else:
            competition_start = datetime.date(
                store_info[entry[0]-1][5],  # Year
                store_info[entry[0]-1][4],  # Month
                1)                          # Day (no entry available)
            
            delta = entry[2] - competition_start
            return delta.days
        
    def GetStateHoliday(entry):
        if entry[5] == "0":
            return 0
        elif entry[5] == "a": # Public Holiday
            return 1
        elif entry[5] == "b": # Easter
            return 2
        else:
            return 3
            
    def GetSchoolHoliday(entry):
        return int(entry[6])
    
    ################################# End of local functions
    
    # Compute the number of days since competition
    nb_days_since_competition = GetCompetTime(entry)
    if nb_days_since_competition > 0:
        corrected_nb_days_comp = nb_days_since_competition
    else:
        corrected_nb_days_comp = 0
    
    # Compute distance to competitor
    if nb_days_since_competition>0:
        dist_comp = store_info[entry[0]-1][3]
    else:
        dist_comp = 0
    
    
    vector = np.array([
        # Store information
        dist_comp,                                          # Distance to competitor
        dist_comp**2,                                       # Squared distance to competitor
        ConvertAsortment(store_info[entry[0]-1][2]),        # Assortment
        ConvertAsortment(store_info[entry[0]-1][2])**2,     # Squared assortment
        1.0/(1+corrected_nb_days_comp),                     # Number of days since competition
        corrected_nb_days_comp,                             
        corrected_nb_days_comp**2,
        # Day information
        entry[1],                    # Day of the week
        1 if (entry[1]==6 or entry[1]==7) else 0,          # is week-end ?
        entry[2].day,                # Day of month
        entry[2].month,              # Month
        entry[2].year,               # Year
        entry[4],                    # Promo
        GetStateHoliday(entry),      # State Holiday
        GetSchoolHoliday(entry)
    ])
    
    return vector

In [14]:
# Generate the matrix containing the test set
test_set = np.zeros((N_test, D))

for i in range(N_test):
    test_set[i,:] = generateFeatureVector(data[i])
    
print(test_set[3,:])

[  7.52000000e+03   5.65504000e+07   1.00000000e+00   1.00000000e+00
   2.84090909e-03   3.51000000e+02   1.23201000e+05   4.00000000e+00
   0.00000000e+00   1.70000000e+01   9.00000000e+00   2.01500000e+03
   1.00000000e+00   0.00000000e+00   0.00000000e+00]


# Estimate outputs


In [15]:
y_hat = model.predict(test_set)

print(y_hat[:10])

[ 7795.29427545  7438.7169107   7988.07017049  7671.90600939  8308.9477677
  7742.27288023  8442.50978895  8332.8367198   7729.72463478  7855.02924571]


# Save outputs

In [16]:
with open(path_base+settings["SUBMISSION_PATH"], "w") as fid:
    # Write header line
    fid.write("\"Id\",\"Sales\"\n")
    
    # Write predictions
    for i in range(N_test):
        fid.write("{},{}\n".format(i+1,y_hat[i]))