# Data loading and preprocessing

In [1]:
import numpy as np
import pandas as pd

In [2]:
OnlRt = pd.read_csv('./OnlineRetail.csv',
                   usecols=['CustomerID','InvoiceDate','UnitPrice','Quantity','Country'],
                   parse_dates=['InvoiceDate'],
                   dtype={'CustomerID':np.str,
                         'UnitPrice':np.float32,
                         'Quantity':np.int32,
                         'Country':np.str})
print(OnlRt.head())

   Quantity         InvoiceDate  UnitPrice CustomerID         Country
0         6 2010-12-01 08:26:00       2.55      17850  United Kingdom
1         6 2010-12-01 08:26:00       3.39      17850  United Kingdom
2         8 2010-12-01 08:26:00       2.75      17850  United Kingdom
3         6 2010-12-01 08:26:00       3.39      17850  United Kingdom
4         6 2010-12-01 08:26:00       3.39      17850  United Kingdom


## Data cleaning

In [3]:
neg_id = OnlRt[(OnlRt['Quantity']<=0)|(OnlRt['UnitPrice']<=0)].loc[:,'CustomerID']
data0 = OnlRt[(OnlRt['CustomerID'].notnull())&
             (~OnlRt['CustomerID'].isin(neg_id))&
             (OnlRt['Country']=='United Kingdom')].drop('Country', axis=1)

In [4]:
data1 = data0.assign(amount=data0['UnitPrice'].multiply(data0['Quantity']))

In [5]:
first_time = data1['InvoiceDate'].sort_values(ascending=True).groupby(data1['CustomerID']).nth(0).apply(lambda x:x.date()).reset_index().rename(columns={'InvoiceDate':'first_time'})
data2 = pd.merge(data1, first_time, how='left', on=['CustomerID'])

## Extracting new features

In [6]:
dayth = (data2['InvoiceDate'].apply(lambda x:x.date()) - data2['first_time']).apply(lambda x: x.days)

In [7]:
month = data2['InvoiceDate'].apply(lambda x:x.month)
weekday = data2['InvoiceDate'].apply(lambda x:x.weekday())
hour = data2['InvoiceDate'].apply(lambda x:x.hour)
minute = data2['InvoiceDate'].apply(lambda x:x.minute)
second = data2['InvoiceDate'].apply(lambda x:x.second)

In [8]:
hour_preci = (second/60+minute)/60+hour

In [9]:
data3 = data2.assign(dayth=dayth).assign(hour=hour_preci).assign(weekday=weekday).drop(['first_time','InvoiceDate'], axis=1).sort_values(by=['CustomerID', 'dayth', 'hour'])

In [10]:
X = data3[data3['dayth']<28].set_index('CustomerID').drop('amount', axis=1).sort_index()

In [11]:
data180 = data3[(data3['dayth']<180)&(data3['CustomerID'].isin(X.index))]
y = data180['amount'].groupby(data180['CustomerID']).sum().sort_index()

In [12]:
X.to_csv('bookdata_X.csv', index=True, header=True)
y.to_csv('bookdata_y.csv', index=True, header=True)

# Preparing Input data

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
from keras.layers import Input, Conv1D, Dropout, LSTM, TimeDistributed, Bidirectional, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping

In [1]:
import matplotlib.pyplot as plt