In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime, date
from dateutil.relativedelta import relativedelta

from sklearn.preprocessing import StandardScaler

from math import ceil

from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.layers.advanced_activations import *
from keras.layers import Convolution1D, MaxPooling1D, AtrousConvolution1D
from keras.layers.recurrent import LSTM, GRU
from keras import regularizers

%matplotlib inline


Using TensorFlow backend.


In [2]:
train = pd.read_csv('all/sales_train.csv')
test = pd.read_csv('all/test.csv')
submission = pd.read_csv('all/sample_submission.csv')
items = pd.read_csv('all/items.csv')
item_cats = pd.read_csv('all/item_categories.csv')
shops = pd.read_csv('all/shops.csv')
print("Ok")

Ok


In [3]:
test_shops = test.shop_id.unique()
train = train[train.shop_id.isin(test_shops)]
test_items = test.item_id.unique()
train = train[train.item_id.isin(test_items)]

MAX_BLOCK_NUM = train.date_block_num.max()
MAX_ITEM = len(test_items)
MAX_CAT = len(item_cats)
MAX_YEAR = 3
MAX_MONTH = 4 # 7 8 9 10
MAX_SHOP = len(test_shops)

In [4]:
# add categories
train = train.set_index('item_id').join(items.set_index('item_id')).drop('item_name', axis=1).reset_index()


train['month'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%m'))
train['year'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%Y'))

In [5]:
train = train.drop('date', axis=1)
train = train.drop('item_category_id', axis=1)
train = train.groupby(['shop_id', 'item_id', 'date_block_num', 'month', 'year']).sum()
train = train.sort_index()


In [6]:
scaler = StandardScaler()
cnt_scaler = StandardScaler()

scaler.fit(train.item_price.values.reshape(-1, 1))
cnt_scaler.fit(train.item_cnt_day.values.reshape(-1, 1))

train.item_price = scaler.transform(train.item_price.values.reshape(-1, 1))
# train.item_cnt_day = cnt_scaler.transform(train.item_cnt_day.values.reshape(-1, 1))

In [7]:
price = train.reset_index().set_index(['item_id', 'shop_id', 'date_block_num'])
price = price.sort_index()

In [8]:
from sklearn import preprocessing

item_le = preprocessing.LabelEncoder()
item_le.fit(test_items)
item_dm = dict(zip(test_items, item_le.transform(test_items)))


shop_le = preprocessing.LabelEncoder()
shop_le.fit(test_shops)
shop_dm = dict(zip(test_shops, shop_le.transform(test_shops)))


month_le = preprocessing.LabelEncoder()
month_le.fit(range(7,11))
month_dm = dict(zip(range(7,11), month_le.transform(range(7,11))))

In [9]:
values = train.values.tolist()
keys = train.index.values.tolist()
table = []
for i in range(len(item_dm)):
    table.append([])
    for j in range(len(shop_dm)):
        table[i].append([])
        for k in range(34):
            table[i][j].append([0,10])
print("prepared")

prepared


In [10]:
values = train.values.tolist()
keys = train.index.values.tolist()
for i in range(len(train.index)):
    key = keys[i]
    value = values[i]
    table[item_dm[key[1]]][shop_dm[key[0]]][key[2]] = [value[0],value[1]]


In [11]:
for i in range(len(item_dm)):
    table.append([])
    for j in range(len(shop_dm)):
        price = 0
        count = 0
        k = 0
        while price==0 and k < 34:
            if table[i][j][k][1]<9: 
                price = table[i][j][k][0]
            k = k+1
        k = 0
        while k < 34:
            if table[i][j][k][1]<9: 
                price = table[i][j][k][0]
            else:
                table[i][j][k][1] = 0
                table[i][j][k][0] = price
            k = k+1
print("done")
# table[0][0]

done


In [12]:
res = []
maxx = 0
for i in range(len(item_dm)):
    res.append([])
    for j in range(len(shop_dm)):
        res[i].append([])
        summ = 0
        for k in range(34):
            summ = summ+table[i][j][k][1]
        res[i][j] = summ
        if summ>maxx:
            maxx=summ


In [13]:
MAX_ITEM = len(test_items)
MAX_MONTH = 32 
MAX_SHOP = len(test_shops)

In [16]:

# x_train = np.zeros((MAX_ITEM,MAX_SHOP*MAX_MONTH*2), dtype=np.float32)
# for i, item in enumerate(table):
#     for j, sales in enumerate(item):  
#         for k in range(MAX_MONTH):
#             x_train[i][j*MAX_MONTH*2+k*2] = table[i][j][k][0]
#             x_train[i][j*MAX_MONTH*2+k*2+1] = table[i][j][k][1]
            
# y_train = np.zeros((MAX_ITEM,MAX_SHOP), dtype=np.float32)
# for i, item in enumerate(table):
#     for j, sales in enumerate(item):  
#         y_train[i][j] = table[i][j][MAX_MONTH][1]

x_train = np.zeros((1, MAX_MONTH, 2*len(train_list)), dtype=np.float32)
for i, item in enumerate(train_list):
    for k in range(MAX_MONTH):
        x_train[0][k][i*2] = table[item[0]][item[1]][k][0]
        x_train[0][k][i*2] = table[item[0]][item[1]][k][1]
            
# y_train = np.zeros((MAX_ITEM,MAX_SHOP), dtype=np.float32)
# for i, item in enumerate(table): 
#      for j, sales in enumerate(item):  
#         y_train[i][j] = table[i][j][MAX_MONTH][1]
y_train = np.zeros((1, 5*len(train_list)), dtype=np.float32)
for i, item in enumerate(train_list): 
    val = table[item[0]][item[1]][MAX_MONTH][1]
    if val>=5:
        val = 4
    y_train[0][i*5+int(val)] = 1.0

In [17]:
# x_train[0][0]

In [18]:
x_val= np.zeros((1, MAX_MONTH, 2*len(train_list)), dtype=np.float32)
for i, item in enumerate(train_list):
    for k in range(MAX_MONTH):
        x_val[0][k][i*2] = table[item[0]][item[1]][k+1][0]
        x_val[0][k][i*2] = table[item[0]][item[1]][k+1][1]
            
# y_train = np.zeros((MAX_ITEM,MAX_SHOP), dtype=np.float32)
# for i, item in enumerate(table): 
#      for j, sales in enumerate(item):  
#         y_train[i][j] = table[i][j][MAX_MONTH][1]
y_val = np.zeros((1, 5*len(train_list)), dtype=np.float32)
for i, item in enumerate(train_list): 
    val = table[item[0]][item[1]][MAX_MONTH+1][1]
    if val>=5:
        val = 4
    y_val[0][i*5+int(val)] = 1.0

In [89]:
print(len(x_train[0]), len(x_train[0][0]))

32 42010


[[1195, 3],
 [1239, 3],
 [1197, 3],
 [1253, 3],
 [1121, 3],
 [1134, 3],
 [1135, 3],
 [1137, 3],
 [1144, 3],
 [1106, 3],
 [1358, 3],
 [1352, 3],
 [1382, 3],
 [1315, 3],
 [1336, 3],
 [1328, 3],
 [1331, 3],
 [1334, 3],
 [1333, 3],
 [1355, 3],
 [1356, 3],
 [1357, 3],
 [1362, 3],
 [1364, 3],
 [1370, 3],
 [1371, 3],
 [847, 3],
 [729, 3],
 [731, 3],
 [736, 3],
 [740, 3],
 [831, 3],
 [725, 3],
 [723, 3],
 [762, 3],
 [765, 3],
 [766, 3],
 [767, 3],
 [768, 3],
 [992, 3],
 [994, 3],
 [995, 3],
 [1013, 3],
 [1017, 3],
 [1023, 3],
 [1027, 3],
 [989, 3],
 [988, 3],
 [848, 3],
 [849, 3],
 [2145, 3],
 [2147, 3],
 [2149, 3],
 [1961, 3],
 [1972, 3],
 [2354, 3],
 [2423, 3],
 [2375, 3],
 [2389, 3],
 [2322, 3],
 [2300, 3],
 [2302, 3],
 [2303, 3],
 [2306, 3],
 [1538, 3],
 [1535, 3],
 [1579, 3],
 [1434, 3],
 [1529, 3],
 [1530, 3],
 [1533, 3],
 [1534, 3],
 [1427, 3],
 [1778, 3],
 [1780, 3],
 [1781, 3],
 [1827, 3],
 [1925, 3],
 [1624, 3],
 [1626, 3],
 [1644, 3],
 [1667, 3],
 [1683, 3],
 [1690, 3],
 [3948, 3],


In [20]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
# model.add(LSTM(1, input_shape=(maxlen, length)))
# model.add(Dense(1, activation='relu'))
# model.add(LSTM(32, return_sequences=True, input_shape=(32, 84),recurrent_initializer='he_uniform'))
model.add(LSTM(4, return_sequences=True, input_shape=(32, 2*len(train_list)),recurrent_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(LSTM(4, input_shape=(32, 2*len(train_list)),recurrent_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(LeakyReLU())
# model.add(Dropout(0.3))
# model.add(LSTM(32, return_sequences=True, input_shape=(32, 84)))
# model.add(LSTM(32, return_sequences=False, input_shape=(32, 2),recurrent_initializer='he_uniform'))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
# model.add(Dense(1, activation='relu'))

# model.add(Dense(64, input_dim=(2688), kernel_initializer='he_uniform'))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
# model.add(Dropout(0.5))
# model.add(Dense(32))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
# model.add(Dropout(0.5))
# model.add(Dense(64))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
# model.add(Dropout(0.5))

# model.add(Dense(128, input_dim=(2688)))
# model.add(Dense(64))
# model.add(Dense(64))

# model.add(Dense(256))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
# model.add(Dropout(0.3))

# model.add(Dense(16))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
model.add(Dense(5*len(train_list)))
# model.add(Activation('softmax'))


# from keras.optimizers import RMSprop, Adam, SGD, Nadam
# optimizer = RMSprop(lr=0.005)
# optimizer = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
# optimizer = Nadam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
optimizer = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

model.compile(loss='mean_squared_error', optimizer=optimizer)

# model.fit(x_train, y_train, batch_size=2048, epochs=50, validation_data=(x_val, y_val))
# model.fit(x_train, y_train, batch_size=2048, epochs=50, validation_data=(x_val, y_val))
model.fit(x_train, y_train, batch_size=32, epochs=10)
# model.fit(x_train, y_train, batch_size=2048, epochs=10)

Build model...


AttributeError: 'Tensor' object has no attribute 'assign'

In [133]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_52 (LSTM)               (None, 32, 32)            14976     
_________________________________________________________________
batch_normalization_111 (Bat (None, 32, 32)            128       
_________________________________________________________________
leaky_re_lu_110 (LeakyReLU)  (None, 32, 32)            0         
_________________________________________________________________
lstm_53 (LSTM)               (None, 32, 32)            8320      
_________________________________________________________________
batch_normalization_112 (Bat (None, 32, 32)            128       
_________________________________________________________________
leaky_re_lu_111 (LeakyReLU)  (None, 32, 32)            0         
_________________________________________________________________
lstm_54 (LSTM)               (None, 32)                8320      
__________

In [134]:
# MAX_ITEM = len(test_items)
# MAX_MONTH = 32 
# MAX_SHOP = len(test_shops)

# month_count = 32
# x_test = np.zeros((MAX_ITEM,MAX_SHOP*MAX_MONTH*2), dtype=np.float32)
# for i, item in enumerate(table):
#     for j, sales in enumerate(item):  
#         for k in range(MAX_MONTH):
#             x_test[i][j*MAX_MONTH*2+k*2] = table[i][j][k+2][0]
#             x_test[i][j*MAX_MONTH*2+k*2+1] = table[i][j][k+2][1]
            
            
x_test = np.zeros((MAX_ITEM, MAX_MONTH, MAX_SHOP*2), dtype=np.float32)
for i, item in enumerate(table):
    for j, sales in enumerate(item):  
        for k in range(MAX_MONTH):
            x_test[i][k][j*2] = table[i][j][k+2][0]
            x_test[i][k][j*2+1] = table[i][j][k+2][1]

In [106]:
y_train[0]

array([ 0.        ,  0.        ,  0.        ,  0.        , -0.15018865,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.15018865,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ], dtype=float32)

In [135]:
predict_test = model.predict(x_test)

In [137]:
predict_test[0]

array([3.40062827e-02, 1.79483891e-06, 1.32155208e-06, 2.44939429e-06,
       2.20105699e-06, 4.23374288e-02, 1.83745851e-06, 1.08320864e-06,
       1.72728187e-06, 1.84071371e-06, 3.61154303e-02, 3.79306675e-06,
       2.01508033e-06, 1.98901853e-06, 1.76862602e-06, 2.47798003e-02,
       2.36423989e-06, 2.27325768e-06, 2.30488490e-06, 2.06634149e-06,
       7.56919151e-03, 5.65094115e-06, 1.86198167e-06, 1.19420065e-06,
       2.41746898e-06, 1.39330439e-02, 2.53451503e-06, 2.13967837e-06,
       1.65792335e-06, 1.03226910e-06, 5.16091585e-02, 3.67291796e-06,
       2.16585227e-06, 1.95733219e-06, 1.60680406e-06, 2.84844898e-02,
       1.57260604e-06, 1.40646011e-06, 2.07964854e-06, 2.26017505e-06,
       3.56795639e-02, 2.47934258e-06, 1.46514378e-06, 1.15042576e-06,
       2.03657510e-06, 1.65591370e-02, 1.82980614e-06, 1.63527955e-06,
       1.13816100e-06, 2.50957805e-06, 2.48355865e-02, 1.82418182e-06,
       1.52385871e-06, 2.27727855e-06, 1.60379420e-06, 1.91739853e-02,
      

In [100]:
predict_test = cnt_scaler.inverse_transform(predict_test)

In [101]:
predict_test[0][0]

2.6955707

In [104]:
predict_test[0]

array([2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707,
       2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707, 2.6955707],
      dtype=float32)

In [167]:
summ = 0 

# for i in range(len(item_dm)):
# #     res.append([])
#     for j in range(len(shop_dm)):
# #         res[i].append([])
# #         summ = 0
# #         for k in range(34):
# #             summ = summ+table[i][j][k][1]
#         summ = table[i][j][33][1] + summ
# #         if summ>maxx:
# #             maxx=summ

# print(summ)
# summ = 0
for k in range(6):
    for sentence in test2.index.values.tolist():
        (shop_id, item_id) = (sentence[0], sentence[1])
        iid = item_dm[item_id]
        sid = shop_dm[shop_id]
        summ = summ + table[iid][sid][28+k][1]
        
print(summ)

summ = 0
for sentence in test2.index.values.tolist():
        (shop_id, item_id) = (sentence[0], sentence[1])
        iid = item_dm[item_id]
        sid = shop_dm[shop_id]
        summ = summ + table[iid][sid][34-12][1]
print(summ)


252950.0
38947.0


In [177]:
test = pd.read_csv('all/test.csv')
test = test.set_index(['shop_id', 'item_id'])
test['item_cnt_month'] = 0

test2 = test
test2
summ = 0
for index, sentence in enumerate(test2.index.values.tolist()):
#     print(index, sentence)
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
    pr1 = 0
    for k in range(6):
        (shop_id, item_id) = (sentence[0], sentence[1])
        iid = item_dm[item_id]
        sid = shop_dm[shop_id]
        pr1 = pr1 + table[iid][sid][28+k][1]
        
    pr2 = 0
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
    pr2 = pr2 + table[iid][sid][34-11][1]
    pr2 = pr2 + table[iid][sid][34-12][1]
    pr2 = pr2 + table[iid][sid][34-13][1]
       
    
#     val = int(res[iid][sid]/120.0+pr1/35.0+pr2/25.0)
    val = int(res[iid][sid]/40.0+pr1/18.0+pr2/6.0)
    
    
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
    test.loc[(shop_id, item_id)]['item_cnt_month'] = val
    summ=summ+val

print(summ)
    

test = test.reset_index().drop(['shop_id', 'item_id'], axis=1)
test.to_csv('submission2.csv', index=False)


28072


In [None]:
predict = []
for i in range(len(item_dm)):
    predict.append([])
    for j in range(len(shop_dm)):
        predict[i].append([])

In [199]:

summ=0

month = 33

for index, sentence in enumerate(test2.index.values.tolist()):
#     print(index, sentence)
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
    
    pr0 = 0
    
    
    for k in range(month):
        (shop_id, item_id) = (sentence[0], sentence[1])
        iid = item_dm[item_id]
        sid = shop_dm[shop_id]
        pr0 = pr0 + table[iid][sid][k][1]
    
    pr1 = 0
    for k in range(6):
        (shop_id, item_id) = (sentence[0], sentence[1])
        iid = item_dm[item_id]
        sid = shop_dm[shop_id]
        pr1 = pr1 + table[iid][sid][month-6+k][1]
        
    pr2 = 0
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
    pr2 = pr2 + table[iid][sid][month-11][1]
    pr2 = pr2 + table[iid][sid][month-12][1]
    pr2 = pr2 + table[iid][sid][month-13][1]
       
#     val = int(pr0/35)
#     val = int(res[iid][sid]/35+pr1/36.0+pr2/18.0)
    val = int(pr1/7.0)
#     val = int(pr1/18.0+pr2/6.0)
    
    
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
    predict[iid][sid] = val
    summ=summ+val

print(summ)
    
error = 0

for sentence in test2.index.values.tolist():
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
    error = (predict[iid][sid] - table[iid][sid][month][1])**2 + error
print (error/len(test2.index.values.tolist()))

# test = test.reset_index().drop(['shop_id', 'item_id'], axis=1)
# test.to_csv('submission2.csv', index=False)

#val = int(res[iid][sid]/35+pr1/24.0+pr2/12.0)
# 17847
# 0.4733940242763772

# val = int(res[iid][sid]/35+pr1/30.0+pr2/15.0)
# 15403
# 0.4690429505135387

# val = int(res[iid][sid]/35+pr1/36.0+pr2/18.0)
# 13677
# 0.46819327731092436


# val = int(pr1/7.0)
# 12897
# 0.42683006535947715



12897
0.42683006535947715


In [19]:

test = pd.read_csv('all/test.csv')
test = test.set_index(['shop_id', 'item_id'])
test['item_cnt_month'] = 0

test2=test

In [14]:

month = 33

predict = []
for i in range(len(item_dm)):
    predict.append([])
    for j in range(len(shop_dm)):
        predict[i].append([])
        
pr0 = []
for i in range(len(item_dm)):
    pr0.append([])
    for j in range(len(shop_dm)):
        pr0[i].append([])

pr1 = []
for i in range(len(item_dm)):
    pr1.append([])
    for j in range(len(shop_dm)):
        pr1[i].append([])

pr2 = []
for i in range(len(item_dm)):
    pr2.append([])
    for j in range(len(shop_dm)):
        pr2[i].append([])
        
        
for i in range(len(item_dm)):
    for j in range(len(shop_dm)):
        summ = 0
        for k in range(month):
            summ = summ + table[i][j][k][1]
        pr0[i][j] = summ
        
        summ = 0
        for k in range(6):
            summ = summ + table[i][j][month-6+k][1]
        pr1[i][j] = summ
        
        summ = 0
        summ = summ + table[i][j][month-11][1]
        summ = summ + table[i][j][month-12][1]
        summ = summ + table[i][j][month-13][1]
        pr2[i][j] = summ
                

In [60]:

summ=0

month = 33

# for arg1 in [25,30,35,40]:
#     for arg2 in [6,8,10,12,14,16,18]:
#         for arg3 in [3,4,5,6,7,8,9,10,11,12]:
# for arg1 in [25,30,35,40]:
#     for arg2 in [6,10,14,18]:
#         for arg3 in [3,5,7,9,11]:
# for arg1 in [35,40,50,60]:
#     for arg2 in [10,12,14]:
#         for arg3 in [7,10,13,16,19]:


# for arg1 in [30,60,90,9999]:
#     for arg2 in [10,20,30,9999]:
#         for arg3 in [7,14,21,9999]:
for arg1 in [9999]:
    for arg2 in [10]:
        for arg3 in [14]:
            summ=0
            for index, sentence in enumerate(test2.index.values.tolist()):
                (shop_id, item_id) = (sentence[0], sentence[1])
                iid = item_dm[item_id]
                sid = shop_dm[shop_id]
#                 val = int(pr0[iid][sid]/arg1) + int(pr1[iid][sid]/arg2) + int(pr2[iid][sid]/arg3)
                val = int(pr0[iid][sid]/float(arg1) + pr1[iid][sid]/float(arg2) + pr2[iid][sid]/float(arg3))
    
    
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
                predict[iid][sid] = val
                summ=summ+val

    
            error = 0

            for sentence in test2.index.values.tolist():
                (shop_id, item_id) = (sentence[0], sentence[1])
                iid = item_dm[item_id]
                sid = shop_dm[shop_id]
                if int(pr1[iid][sid]/4)==0:
                    error = (0 - table[iid][sid][month][1])**2 + error
#                 error = (predict[iid][sid] - table[iid][sid][month][1])**2 + error
#                 error = (0 - table[iid][sid][month][1])**2 + error
                
            print (arg1,arg2,arg3,summ, error/float(len(test2.index.values.tolist())))


            

9999 10 14 9037 0.2480298786181139


In [15]:
count = 0
arg1 = 18*100
arg2 = 4
arg3 = 3*4*10
train_list = []
for index, sentence in enumerate(test2.index.values.tolist()):
    (shop_id, item_id) = (sentence[0], sentence[1])
    iid = item_dm[item_id]
    sid = shop_dm[shop_id]
#                 val = int(pr0[iid][sid]/arg1) + int(pr1[iid][sid]/arg2) + int(pr2[iid][sid]/arg3)
    val = int(pr0[iid][sid]/(arg1) + pr1[iid][sid]/(arg2) + pr2[iid][sid]/(arg3))
    
    
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
    predict[iid][sid] = val
    if val>0:
        count = count+1
        train_list.append([iid,sid])
print(count)


NameError: name 'test2' is not defined

In [27]:

summ=0

month = 33

# for arg1 in [25,30,35,40]:
#     for arg2 in [6,8,10,12,14,16,18]:
#         for arg3 in [3,4,5,6,7,8,9,10,11,12]:
# for arg1 in [25,30,35,40]:
#     for arg2 in [6,10,14,18]:
#         for arg3 in [3,5,7,9,11]:
# for arg1 in [35,40,50,60]:
#     for arg2 in [10,12,14]:
#         for arg3 in [7,10,13,16,19]:


# for arg1 in [30,60,90,9999]:
#     for arg2 in [10,20,30,9999]:
#         for arg3 in [7,14,21,9999]:
for arg1 in [9999]:
    for arg2 in [10]:
        for arg3 in [14]:
            summ=0
            for index, sentence in enumerate(test2.index.values.tolist()):
                (shop_id, item_id) = (sentence[0], sentence[1])
                iid = item_dm[item_id]
                sid = shop_dm[shop_id]
#                 val = int(pr0[iid][sid]/arg1) + int(pr1[iid][sid]/arg2) + int(pr2[iid][sid]/arg3)
                val = int(pr0[iid][sid]/float(arg1) + pr1[iid][sid]/float(arg2) + pr2[iid][sid]/float(arg3))
    
    
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
                predict[iid][sid] = val
                summ=summ+val




In [26]:
summ

9037

In [28]:
test = pd.read_csv('all/test.csv')
test = test.set_index(['shop_id', 'item_id'])
test['item_cnt_month'] = 0

test2 = test
# test2
for index, sentence in enumerate(test2.index.values.tolist()):
#     print(index, sentence)
    (shop_id, item_id) = (sentence[0], sentence[1])
    sid = shop_dm[shop_id]
    iid = item_dm[item_id]
#     val=0
#     p = 0
#     for j in range(5):
#         if predict_test[item_dm[item_id]][shop_dm[shop_id]*5+j] > p:
#             p = predict_test[item_dm[item_id]][shop_dm[shop_id]*5+j]
#             val = j
#     test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[item_dm[item_id]][shop_dm[shop_id]]
    test.loc[(shop_id, item_id)]['item_cnt_month'] = predict[iid][sid]
    
    

test = test.reset_index().drop(['shop_id', 'item_id'], axis=1)
test.to_csv('submission2.csv', index=False)


In [33]:
test.sum()

ID                22940712900
item_cnt_month           9037
dtype: int64

In [68]:
import math
from sklearn.metrics import mean_squared_error

# make predictions
# predict_train = model.predict(x_train)
predict_val = model.predict(x_val)
# invert predictions
# predict_train = cnt_scaler.inverse_transform(predict_train)
# yy_train = cnt_scaler.inverse_transform(y_train)
predict_val = cnt_scaler.inverse_transform(predict_val)
yy_val = cnt_scaler.inverse_transform(y_val)
# calculate root mean squared error
# trainScore = math.sqrt(mean_squared_error(predict_train, yy_train))
# print('Train Score: %.2f RMSE' % (trainScore))
valScore = math.sqrt(mean_squared_error(predict_val, yy_val))
print('Test Score: %.2f RMSE' % (valScore))
#For 1 epoch
# Train Score: 2.31 RMSE
# Test Score: 1.42 RMSE
# 6 epoch
# Train Score: 1.89 RMSE
# Test Score: 1.98 RMSE
# 10 epoch
# Train Score: 1.53 RMSE
# Test Score: 2.23 RMSE
# 10 epoch
# 16 LSTM
# Train Score: 1.93 RMSE
# Test Score: 1.90 RMSE
# 10 epoch
# 8 LSTM
# Train Score: 2.31 RMSE
# Test Score: 1.77 RMSE

# 4 LSTM
# Test Score: 1.52 RMSE

# 1 LSTM
# Test Score: 0.98 RMSE

# 4 Dense, size 16, 10 epoch
# 0.82


# 5 Dense, size 16, 12 epoch
# Test Score: 1.36 RMSE

# Dense 16, 8, 8, 8, 8, 
# epoch 8
# Test Score: 0.58 RMSE


Test Score: 1.43 RMSE


In [38]:
print(len(x_fin))
print(len(y_fin))
# print(len(y_train))
# print(len(y_val))

134126
134126


In [39]:
print('Build model...')
model = Sequential()
# model.add(LSTM(1, input_shape=(maxlen, length)))
# model.add(Dense(1, activation='relu'))


# model.add(Dense(64, input_dim=length))
model.add(Dense(16, input_dim=(length)))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Dense(8, input_dim=(length)))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Dense(8, input_dim=(length)))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Dense(8, input_dim=(length)))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

model.add(Dense(8, input_dim=(length)))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))
# model.add(Dense(16))
# model.add(BatchNormalization())
# model.add(LeakyReLU())
model.add(Dense(1))
# model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.005)
model.compile(loss='mean_squared_error', optimizer=optimizer)

model.fit(x_fin, y_fin, batch_size=2048, epochs=5)

Build model...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f897a26be80>

In [38]:
model.save("my_model.h5") 

In [None]:
model = keras.models.load_model("my_model.h5") 

In [95]:
del predict_train
del predict_val

In [58]:
model.fit(x_val, y_val, batch_size=2048, epochs=13)

Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


<keras.callbacks.History at 0x7f9d29e44f98>

In [40]:
predict_test = model.predict(x_test)
predict_test = cnt_scaler.inverse_transform(predict_test)

In [41]:

test = pd.read_csv('all/test.csv')
test = test.set_index(['shop_id', 'item_id'])
test['item_cnt_month'] = 0

for index, sentence in enumerate(x_test_o):
    (shop_id, item_id) = (sentence[0]['shop_id'], sentence[0]['item_id'])
    test.loc[(shop_id, item_id)]['item_cnt_month'] = predict_test[index]
    
    

test = test.reset_index().drop(['shop_id', 'item_id'], axis=1)
test.to_csv('submission2.csv', index=False)
