In [1]:
import os
import numpy as np
import scipy as sp
from scipy import stats
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from gc import collect

In [2]:
def write_prediction_to_file(prediction):
    header = "id,nextvisit"
    with open("submission.csv", "w") as f:
        print(header, file=f)
        for idx, p in enumerate(prediction, 1):
            print(f"{idx}, {p}", file=f)

In [3]:
def visits2weekdays(visits):
    return np.apply_along_axis(lambda x: (x - 1) % 7 + 1, 0, visits)

def visits2history(visits, history_size):
    history = np.zeros(history_size, dtype=int)
    history[visits-1] = 1
    return history

In [4]:
with open('./train.csv', 'r') as f:
    data = pd.read_csv(f)
    data.visits = data.visits.apply(lambda x: (np.fromstring(x, dtype=int, sep=" ") - 1) % 7)

In [5]:
data.head()

Unnamed: 0,id,visits
0,1,"[1, 6, 6, 1, 3, 3, 3, 5, 5, 0, 4, 6, 6, 5, 4, ..."
1,2,"[2, 3, 4, 0, 3, 2, 4, 0, 2, 6, 5, 3, 4, 4, 2, ..."
2,3,"[1, 2, 4, 4, 4, 0, 6, 5, 5, 6, 4, 6, 5, 1, 6, ..."
3,4,"[0, 5, 0, 0, 4, 1, 0, 5, 1, 4, 6, 4, 5, 4, 1, ..."
4,5,"[5, 1, 6, 0, 4, 1, 1, 1, 6, 1, 1, 1, 1, 4, 1, ..."


TypeError: operands could not be broadcast together with shapes (29,) (205,) 

In [None]:
DAYS_IN_WEEK = 7

def week_weights_computation(data, delta, alpha):
    result = np.zeros((len(data), DAYS_IN_WEEK))
    for idx, row in tqdm(enumerate(data), total=len(data)):
        visits_num = float(len(row))
        week_weights = np.zeros(DAYS_IN_WEEK)
        for i, weekday in enumerate(row):
            w_1 = i / visits_num
            w_2 = np.log(i + 1) / np.log(visits_num)
            week_weights[weekday] += alpha * np.power(w_1, delta) + (1 - alpha) * w_2
        result[idx, : ] = week_weights / np.sum(week_weights)
    return result


def probs_computation(week_weights):
    probs = week_weights
    probs[1:] = 1 - probs[1:]
    for i in range(1, DAYS_IN_WEEK):
        probs[i] *= probs[i-1]
    probs *= week_weights
    return probs

def likelyhood_computation(data, delta, alpha):
    client_weekday_weights = week_weights_computation(data, delta, alpha)
    result = np.zeros(len(data))
    for idx, client_weights in enumerate(client_weekday_weights):
        probs = probs_computation(client_weights)
        result[idx] = np.argmax(probs)
    return result, client_weekday_weights

In [None]:
np.argmax(np.array([[0,1,3, 7, 5, 4, 2,], [0,1,3, 7, 9, 4, 2,]]), axis=1) + 1

In [None]:
data.visits[0]

In [None]:
train_data = [row[:-1] for row in data.visits]
test_data = [row[-1] for row in data.visits]

In [None]:
train_data[0], test_data[0]

In [None]:
data.visits[0]

### LSTM in Keras

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM, Embedding
from keras.utils import to_categorical

# one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)

In [None]:
train_x = [row[-25:-1] for row in train_df.weekdays] 
train_y = [row[-1] for row in train_df.weekdays]
train_x = np.stack(train_x, axis=0)-1
train_y = np.stack(train_y, axis=0)-1

In [None]:
train_x[0], train_y[0]

In [None]:
#train_x = to_categorical(train_x, num_classes=7)
train_y = to_categorical(train_y, num_classes=7)

In [None]:
model = Sequential()
model.add(Embedding(7, 16, input_length=24))
model.add(LSTM(100))
model.add(Dense(7, activation='softmax'))
print(model.summary())

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(train_x, train_y, batch_size=64, epochs=500, verbose=2)

In [None]:
test_x = [row[-24:] for row in train_df.weekdays]
test_x = np.stack(test_x, axis=0)-1
test_y = np.array([row[-1] for row in train_df.weekdays])-1

pred = model.predict_classes(test_x ,verbose=2)

In [None]:
pred += 1

In [None]:
write_prediction_to_file(pred)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(test_y, pred)

In [None]:
test_x = np.stack(test_x, axis=0)
test_x.shape

In [None]:
stats.mode(train_df.weekdays[0])

In [None]:
DAYS_IN_WEEK = 7
WEEK_IN_MONTH = 4
DAYS_IN_MONTH = DAYS_IN_WEEK * WEEK_IN_MOTH
MONTH_IN_YEAR = 12