In [None]:
import pandas as pd
pd.Series

import math
import csv
from datetime import datetime

import numpy as np
import scipy as sc

import statsmodels
import sklearn
import matplotlib.pylab as plt
%matplotlib inline

from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 25, 16

import theano
import tensorflow
import keras

from sklearn import preprocessing

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
from matplotlib import pyplot

from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE

In [None]:
df = pd.read_csv('../', sep='\t', index_col=0, parse_dates=True)
df = df.sort_index()

def create_small_df(data, columns):
    small_df = data.copy()
    small_df = small_df[columns]

    return small_df

### The main module with the model 

In [None]:
#LSTM Data Preparation
# convert series to supervised learning

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

def preprocessing_data(data, n, s_columns):
    # normalize features
    scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(data)
    # frame as supervised learning
    reframed = series_to_supervised(scaled, n, 1)
    # drop columns we don't want to predict
    columns_to_drop = list(range(-s_columns+1,0))
    reframed.drop(reframed.columns[columns_to_drop], axis=1, inplace=True)
    values = reframed.values
    return scaler, values

def split_data(values, n_steps,s_columns, n_train_days, n_valid_days, n_test_days):
    train = values[:n_train_days, :]
    valid = values[n_train_days : int(n_train_days + n_valid_days), :]
    test = values[int(n_train_days + n_valid_days):, :]
    # split into input and outputs
    train_X, train_y = train[:, :-1], train[:, -1]
    valid_X, valid_y = valid[:, :-1], valid[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]
    # reshape input to be 3D [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], n_steps, s_columns))
    valid_X = valid_X.reshape((valid_X.shape[0], n_steps, s_columns))
    test_X = test_X.reshape((test_X.shape[0], n_steps, s_columns))
    return train_X, train_y, valid_X, valid_y, test_X, test_y

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def summarize_results(scores):
    #print(scores)
    m, s = np.mean(scores), np.std(scores)
    print('Mean %.4f (+/- %.4f)' % (m,s))

In [None]:
def define_fit_lstm(train_X, train_y, valid_X, valid_y, test_X, test_y, n_steps, scaler, s_columns):
    s = s_columns
    n_input = n_steps*s
    model = Sequential()
    model.add(LSTM(100, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    model.compile(loss='mae', optimizer='Adagrad', metrics=['mse', 'mae', 'mape'])
    history = model.fit(train_X, train_y, epochs=100, verbose=0, batch_size=n_input, shuffle=False, validation_data=(valid_X, valid_y))
    # fit network
    ###history = model.fit(train_X, train_y, epochs=100, verbose=0, batch_size=n_input, shuffle=False)
    # plot history 
    plt.subplot(2, 1, 1)
    plt.plot(history.history['loss'], label='loss_MAE', lw=2)
    plt.plot(history.history['val_loss'], label='val_loss_MAE', lw=2)
    plt.title('Training and Validation Loss')
    plt.legend(prop={'size': 15})
    plt.grid(True)
    plt.show()
    
    # evaluate model
    results = model.evaluate(test_X, test_y, verbose=0)
    loss, mse, mae, mape = results 
    print('Evaluation results: loss, mse, mae, mape') 
    print(results)
    print ('History results: ')
    print('Loss: %.3f - %.3f' % (history.history['loss'][0],history.history['loss'][-1]))
    print('Validation Loss: %.3f - %.3f' % (history.history['val_loss'][0],history.history['val_loss'][-1]))
   
    # make a prediction
    yhat = model.predict(test_X)
    test_X = test_X.reshape((test_X.shape[0], n_steps*s_columns))
    # invert scaling for forecast
    inv_yhat = np.concatenate((yhat, test_X[:, 1-s:]), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,0]
    # invert scaling for actual
    test_y = test_y.reshape((len(test_y), 1))
    inv_y = np.concatenate((test_y, test_X[:, 1-s:]), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:,0]
    # calculate RMSE
    rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
    # recalculate MAPE (results are the same actually)
    mape = mean_absolute_percentage_error(inv_y, inv_yhat)
    #print('Test RMSE: %.3f' % rmse)
    #print('Test MAPE: %.3f' % mape)
    plt.subplot(2, 1, 1)
    plt.plot(inv_yhat, label='yhat', linestyle='--', lw=2)
    plt.plot(inv_y, label='y', lw=2)
    plt.title('Observed and Predicted Values')
    plt.legend(prop={'size': 15})
    plt.grid(True)
    plt.show()
    return loss, rmse, mape

   

In [None]:
def run_model(n,s,values):
    size = values.shape[0]
    n_train_days = round(size*0.8)
    n_valid_days = round(size*0.1)
    #n_valid_days=0
    n_test_days = size - n_train_days - n_valid_days
    #n_features = 8
    n_steps = n
    s_columns = s
    #Preprocessing
    scaler, new_values = preprocessing_data(values, n_steps, s_columns)
    #Split data into train and test sets
    train_X, train_y, valid_X, valid_y, test_X, test_y = split_data(new_values, n_steps, s_columns, n_train_days, n_valid_days, n_test_days)
    #Define and fit our LSTM model
    #Make a prediction
    loss, rmse, mape = define_fit_lstm(train_X, train_y, valid_X, valid_y, test_X, test_y, n_steps, scaler, s_columns)
    return loss, rmse, mape

def run_experiment(n,s,values,repeats=5):
    print("Run experiment with " + str(repeats) + " repeats")
   #repeat experiment 
    losses = list()
    rmses = list()
    mapes = list()
    for r in range(repeats):
        print('--------------------------------------------------------------------------------------------------------')
        print('Run #%d' % (r+1))
        loss, rmse, mape = run_model(n,s,values)
        print('>#%d Training Loss: %.3f' % (r+1, loss))
        print('>#%d Test RMSE: %.3f' % (r+1, rmse))
        print('>#%d Test MAPE: %.3f' % (r+1, mape))
        losses.append(loss)
        rmses.append(rmse)
        mapes.append(mape)
    print('--------------------------------------------------------------------------------------------------------')    
    print('Final Results: ')
    print('Average Loss: ')
    summarize_results(losses)
    print('Average RMSE: ')
    summarize_results(rmses)
    print('Average MAPE: ')
    summarize_results(mapes)
    return(summarize_results(rmses))

### Experiment with 5 repeats

In [None]:
# Experiment with all 21 features
vals = df.values
vals = vals.astype('float32')
run_experiment(2, 21, vals)

#### Experimnets with original data set (8 features), predicting Average sale amount 

In [None]:
columns = ['AVERAGE_SALE_AMOUNT', 'PROFIT', 'NUMBER_OF_SALES','CONVERSION_RATE', 'WEEKDAY', 'COST', 'CLICKS', 'IMPRESSIONS']
small_df = create_small_df(df, columns)
small_df.head()

In [None]:
# VALUES 
values = small_df.values
values = values.astype('float32')

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

for f in df.columns:
    series = df[f]
    plot_acf(series)
    plt.title(f)
    plt.show()

In [None]:
run_experiment(1, 8, values)

In [None]:
run_experiment(2, 8, values)

In [None]:
run_experiment(3, 8, values)

### Feature Selection

#### Experiment with selected features by f_regression

In [None]:
# Feature Extraction with Univariate Statistical Tests (Chi-squared for classification)
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_regression, mutual_info_regression

In [None]:
data = df.copy()

cols = ['features', 'f_regression', 'mutual_f_regression']
score_df = pd.DataFrame([], columns=cols)
score_df.head()

In [None]:
def calc_f_regression(data, k):
    array = data.values
    X = array[:,1:]
    Y = array[:,0]
    list_features = data.columns[1:]
    columns = data.columns
    test = SelectKBest(score_func=f_regression, k=8)
    fit = test.fit(X, Y)
    # summarize scores
    np.set_printoptions(precision=3)
    list_scores = fit.scores_
    #features = fit.transform(X)
    return list_features, list_scores

def calc_mutual_f_regression(data, k):
    array = data.values
    X = array[:,1:]
    Y = array[:,0]
    list_features = data.columns[1:]
    columns = data.columns
    test = SelectKBest(score_func=mutual_info_regression, k=8)
    fit = test.fit(X, Y)
    # summarize scores
    np.set_printoptions(precision=3)
    list_scores = fit.scores_
    #features = fit.transform(X)
    return list_features, list_scores

#print('#{} {}: {:.2f}'.format(i, columns[i+1], fit.scores_[i]))
list_features, list_scores1 = calc_f_regression(data, 8)
list_scores2 = calc_mutual_f_regression(data, 8)[1]
score_df['features'] = list_features
score_df['f_regression'] = list_scores1
score_df['mutual_f_regression'] = list_scores2
#score_df

In [None]:
score_df.sort_values('f_regression',ascending = False).head(7)[['features','f_regression']]

In [None]:
columns_f = score_df.sort_values('f_regression',ascending = False).head(7)[['features','f_regression']]['features'].tolist()
columns_f

In [None]:
score_df.sort_values('mutual_f_regression',ascending = False).head(7)[['features','mutual_f_regression']]

In [None]:
columns_mutal = score_df.sort_values('mutual_f_regression',ascending = False).head(7)[['features','mutual_f_regression']]['features'].tolist()
columns_mutal

### LSTM with top 7 f_regression features

In [None]:
new_df = create_small_df(df, columns_f)
new_df.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
new_df.head()

In [None]:
# VALUES 
values = new_df.values
values = values.astype('float32')

In [None]:
run_experiment(1, 8, values)

In [None]:
run_experiment(2, 8, values)

#### LSTM with top 7 features from mutal_regression

In [None]:
new_df2 = create_small_df(df, columns_mutal)
new_df2.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
new_df2.head()

In [None]:
# VALUES 
values2 = new_df2.values
values2 = values2.astype('float32')

In [None]:
run_experiment(1, 8, values2)

In [None]:
run_experiment(2, 8, values2)

#### experiment based on plot_acf results. Drop: EPC, Cost_Sales_Ratio, CPO

In [None]:
acf_df = df.drop(['CPO', 'EPC', 'COST_SALES_RATIO'], axis=1)
acf_values = acf_df.values.astype('float32')
run_experiment(3, 18, acf_values)

### Recoursive feature elimination 

In [None]:
from sklearn import linear_model
lm = linear_model.LinearRegression()

X = df.drop(['AVERAGE_SALE_AMOUNT'], axis=1)
y = df['AVERAGE_SALE_AMOUNT']

lm.fit(X, y)
a = lm.coef_

lstm_results = pd.DataFrame({'features': [], 'reults': []})

In [None]:
lstm_results

In [None]:
for k in range(1,12):
    rfe = RFE(lm, k)
    fit = rfe.fit(X, y)

    columns_rfe = []
    # report selected features
    print('Selected Features:')
    names = df.columns.values[1:]
    for i in range(len(fit.support_)):
        if fit.support_[i]:
            columns_rfe.append(names[i])
    df_rfe = create_small_df(df, columns_rfe)
    df_rfe.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
    values_rfe = df_rfe.values.astype('float32')
    print(columns_rfe)
    result = run_experiment(2, k+1, values_rfe)
    lstm_results = lstm_results.append({'features' : columns_rfe, 'reults': result} , ignore_index=True)

In [None]:
a = lstm_results.iloc[10]
a.tolist()

In [None]:
set = ['CONVERSION_RATE', 'WEEKDAY', 'COST_SALES_RATIO','NUMBER_OF_SOLD_ITEMS']
df_set = create_small_df(df, set)
df_set.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
values_set = df_set.values.astype('float32')
f = df_set.shape[1]
run_experiment(2, f, values_set)

### Check left features. Go by cycle be one

In [None]:
df_drop = df.drop(['NUMBER_OF_SALES', 'CONVERSION_RATE', 'WEEKDAY', 
                   'NUMBER_OF_SOLD_ITEMS', 'NEW_CUSTOMER_COUNT','NEW_CUSTOMER_VALUE',
                   'CPC', 'CTR', 'COST_SALES_RATIO','CPO','ROI','AVERAGE_SALE_AMOUNT'], axis=1)

In [None]:
left = list(df_drop.columns)

In [None]:
for i in left:
    set = ['CONVERSION_RATE', 'WEEKDAY', 'COST_SALES_RATIO','NUMBER_OF_SOLD_ITEMS']
    set.append(i)
    df_set = create_small_df(df, set)
    df_set.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
    values_set = df_set.values.astype('float32')
    f = df_set.shape[1]
    run_experiment(2, f, values_set) 

### Check the best combinations

In [None]:
new_list = ['COST', 'CLICKS', 'EPC', 'SALE_AMOUNT_BEFORE_CANCELLATIONS']
for i in new_list:
    set = ['CONVERSION_RATE', 'WEEKDAY', 'COST_SALES_RATIO','NUMBER_OF_SOLD_ITEMS', 'IMPRESSIONS']
    set.append(i)
    print('---------------------------' + str(i) + '--------------------------')
    print(set)
    df_set = create_small_df(df, set)
    df_set.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
    values_set = df_set.values.astype('float32')
    f = df_set.shape[1]
    run_experiment(2, f, values_set) 

##### Check ['CONVERSION_RATE', 'WEEKDAY', ‘COST_SALES_RATIO','NUMBER_OF_SOLD_ITEMS','IMPRESSIONS'] again
#### 5-time run

In [None]:
set = ['CONVERSION_RATE', 'WEEKDAY', 'COST_SALES_RATIO','NUMBER_OF_SOLD_ITEMS', 'IMPRESSIONS']
df_set = create_small_df(df, set)
df_set.insert(0, 'AVERAGE_SALE_AMOUNT', df['AVERAGE_SALE_AMOUNT'])
values_set = df_set.values.astype('float32')
f = df_set.shape[1]

#### t-1 step

In [None]:
run_experiment(1, f, values_set)

#### t-2 step

In [None]:
run_experiment(2, f, values_set)

##### t-3 step

In [None]:
run_experiment(3, f, values_set)

##### t-4 step

In [None]:
run_experiment(4, f, values_set)

##### t-5 step

In [None]:
run_experiment(5, f, values_set)

##### t-7 step

In [None]:
run_experiment(7, f, values_set)

##### t-9 step

In [None]:
run_experiment(9, f, values_set)

##### t-10 step

In [None]:
run_experiment(10, f, values_set)

##### t-12 step

In [None]:
run_experiment(12, f, values_set)

##### t-16 step

In [None]:
run_experiment(16, f, values_set)

##### t-20 step

In [None]:
run_experiment(20, f, values_set)

##### t-50 step

In [None]:
run_experiment(50, f, values_set)

##### t-100 step

In [None]:
left