## Data Analysis 

#### Section 0: Exploratory Data Analysis
#### Section 1: LASSO Models 
#### Section 2: Ridge Models 
#### Section 3: ElasticNet Models  
#### Section 4: Cross validation across regression regularization models
#### Section 5: Neural Network Models
#### Section 6: OLS Models (CHOSEN MODEL)

In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoLarsIC
from sklearn.pipeline import make_pipeline 
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import ElasticNet
import math
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense 
from keras.models import Sequential
import tensorflow as tf
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.regression import linear_model
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import RepeatedKFold

from sklearn.linear_model import Ridge, RidgeCV, Lasso
from sklearn.preprocessing import StandardScaler


#warnings.filterwarnings('ignore')
%matplotlib inline
sns.set_context('notebook') 
sns.set_style('ticks')

In [None]:
df_test = pd.read_csv("ATM_test.csv")
df_train = pd.read_csv("ATM_training.csv")

FileNotFoundError: ignored

In [None]:
y_train = df_train.iloc[:,-1]
x_train = df_train.iloc[:,0:6]
x_test = df_test.iloc[:,0:6]
y_test = df_test.iloc[:,-1]

## Section 0: Exploratory Data Analysis

In [None]:
df_train.describe()

In [None]:
df_train['High'].value_counts()

In [None]:
sns.pairplot(df_train)
plt.show()

In [None]:
correlations = df_train.corr()
correlations

In [None]:
corrmat = df_train.corr()
hm = sns.heatmap(corrmat, 
                 cbar=True, 
                 annot=True, 
                 square=True, 
                 fmt='.2f', 
                 annot_kws={'size': 10}, 
                 yticklabels=df_train.columns, 
                 xticklabels=df_train.columns, 
                 cmap="Spectral_r")
plt.show()

## Section 1: LASSO Models

### Model & Variable Selection

AIC Criterion

In [None]:
start_time = time.time()
lasso_lars_ic = make_pipeline(
    StandardScaler(), LassoLarsIC(criterion="aic", normalize=True) ).fit(x_train, np.ravel(y_train))
fit_time = time.time() - start_time

In [None]:
results = pd.DataFrame( 
    {
        "alphas": lasso_lars_ic[-1].alphas_,
        "AIC criterion": lasso_lars_ic[-1].criterion_,
    }
).set_index("alphas")
alpha_aic = lasso_lars_ic[-1].alpha_

BIC Criterion

In [None]:
lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(x_train, np.ravel(y_train))
results["BIC criterion"] = lasso_lars_ic[-1].criterion_
alpha_bic = lasso_lars_ic[-1].alpha_

In [None]:
def highlight_min(x):
    x_min = x.min()
    return ["font-weight: bold" if v == x_min else "" for v in x]

results.style.apply(highlight_min)

Graphing BIC and AIC Values

In [None]:
ax = results.plot()

ax.vlines(
    alpha_aic,
    results["AIC criterion"].min(),
    results["AIC criterion"].max(),
    label="alpha: AIC estimate",
    linestyles="--",
    color="tab:blue",
)
ax.vlines(
    alpha_bic,
    results["BIC criterion"].min(),
    results["BIC criterion"].max(),
    label="alpha: BIC estimate",
    linestyle="--",
    color="tab:orange",
)

ax.set_xlabel(r"$\alpha$")
ax.set_ylabel("criterion")
ax.set_xscale("log")
ax.legend()
_ = ax.set_title(
    f"Information-criterion for model selection (training time {fit_time:.2f}s)"
)

CV

In [None]:
start_time = time.time()
model = make_pipeline(StandardScaler(), LassoCV(cv=30)).fit(x_train, np.ravel(y_train))
fit_time = time.time() - start_time

In [None]:
optimal_alpha_cv = model[-1].alpha_

In [None]:
ymin, ymax = 2300, 3800
lasso = model[-1]
plt.semilogx(lasso.alphas_, lasso.mse_path_, linestyle=":")
plt.plot(
    lasso.alphas_,
    lasso.mse_path_.mean(axis=-1),
    color="black",
    label="Average across the folds",
    linewidth=2,
)
plt.axvline(lasso.alpha_, linestyle="--", color="black", label="alpha: CV estimate")
plt.xlabel(r"$\alpha$")
plt.ylabel("Mean square error")
plt.legend()
_ = plt.title(
    f"Mean square error on each fold: coordinate descent (train time: {fit_time:.2f}s)"
)

In [None]:
alphas = np.linspace(0.001,200,100)
lasso = Lasso(max_iter=10000, normalize=True)
coefs = []

for a in alphas:
    lasso.set_params(alpha=a)
    lasso.fit(x_train, y_train)
    coefs.append(lasso.coef_)
ax = plt.gca()

ax.plot(alphas, coefs)
ax.set_xscale('log')
plt.axis('tight')
plt.xlabel('alpha')
plt.legend(labels=x_train.columns)
plt.ylabel('Standardized Coefficients')
plt.axvline(model[-1].alpha_, linestyle="--", color="black", label="alpha: CV estimate")
plt.title('Lasso coefficients as a function of alpha');

### Models

Best CV Model

In [None]:
lassocv_best = Lasso(alpha=model[-1].alpha_)
lassocv_best.fit(x_train, y_train)
lassoCV_coef = lassocv_best.coef_
lassoCV_MSE = mean_squared_error(y_test, lassocv_best.predict(x_test))

In [None]:
pd.DataFrame(lassoCV_coef, index = x_train.columns).T

Best AIC Model

In [None]:
lassoaic_best = Lasso(alpha=alpha_aic)
lassoaic_best.fit(x_train, y_train)
lassoAIC_coef = lassoaic_best.coef_
lassoAIC_MSE = mean_squared_error(y_test, lassoaic_best.predict(x_test))

In [None]:
pd.DataFrame(lassoAIC_coef, index = x_train.columns).T

Best BIC Model

In [None]:
lassobic_best = Lasso(alpha=alpha_bic)
lassobic_best.fit(x_train, y_train)
lassoBIC_coef = lassobic_best.coef_
lassoBIC_MSE = mean_squared_error(y_test, lassobic_best.predict(x_test))

In [None]:
pd.DataFrame(lassoBIC_coef, index = x_train.columns).T

##Section 2: Ridge Regression Model

In [None]:
alphas = np.exp(np.linspace(-10,20,500)) 

In [None]:
#use CV to find the best alpha value for the data 

ridgecv = RidgeCV(alphas = alphas, scoring = 'neg_mean_squared_error', normalize = True)
ridgecv.fit(x_train, y_train)
ridgecv.alpha_

In [None]:
#pass the best alpha value into the ridge regression and fit it

ridge = Ridge(alpha = ridgecv.alpha_, normalize = True)
ridge.fit(x_train, y_train)

mse = mean_squared_error(y_test, ridge.predict(x_test))

In [None]:
pd.DataFrame(ridge.coef_, index = x_train.columns).T

In [None]:
y_pred = ridge.predict(x_test)

In [None]:
# importing the module
import matplotlib.pyplot as plt

# fitting the size of the plot
plt.figure(figsize = (15, 8))

# plotting the graphs for actual-value and predicted values
plt.plot(y_test, label = "actual-values")
plt.plot(y_pred, label = "Predicted values")

# showing the plotting of predictive modelling technique
plt.legend()
plt.show()

## Section 3: Elastic Net Models

Dealing with categorical features

In [None]:
train=pd.get_dummies(df_train, drop_first=True)
test=pd.get_dummies(df_test, drop_first=True)

In [None]:
response = ['Withdraw']
predictors=[x for x in list(train.columns) if x not in response]


Elastic Net Model

---


Determine shrinkage parameter to choose optimal alpha value: Using cross validation method

In [None]:
x_train = train[predictors]
y_train = train[response]
# Determining optimal alpha value
enet_cv = ElasticNetCV(l1_ratio=[0.01,0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.99], normalize=True)
enet_cv.fit(train[predictors], np.ravel(train[response]))
enet = ElasticNet(alpha=enet_cv.alpha_, l1_ratio=enet_cv.l1_ratio_)

# Fitting the Elastic Net model to the training data
enet = enet.fit(x_train, np.ravel(y_train))

l1_ratio is almost close to 1. Thus, using cross-validation l1 lasso has an overly better performance than l2.

Calculate Mean squared Error and R2 value for the Elastic Net model

In [None]:
x = test[predictors]
y = test[response]

enet_score = enet.score(x, y)
predict_y = enet.predict(x)

# Mean squared Error of Elastic net model
mse = mean_squared_error(y, predict_y)

Plotting the Elastic net model predicted values against the actual observed values

In [None]:
# fitting the size of the plot
plt.figure(figsize=(15, 8))
y_test = test[response]
y_pred = enet.predict(x_test)

# plotting the graphs for actual-value and predicted values
plt.plot(y_test, label = "actual-values")
plt.plot(y_pred, label = "Predicted values")

# showing the plotting of predictive modelling technique
plt.legend()
plt.show()

## Section 4: Cross validation across Regression Regularization models

Cross Validation across Lasso, Ridge and Elastic Net Regression models


Suggests which model best performs on the training dataset

In [None]:
# Use KFold for Cross validator
kfold = KFold(10, shuffle=True, random_state=1)

# Data frame column and row 
columns = ['CV MSE']
rows = ['Ridge', 'Lasso']

regressions = [lassoaic_best, ridge]
# Create Data frame that holds the performance on each model
results = pd.DataFrame(0.0, columns=columns, index=rows)

methods = {k: v for k, v in zip(rows, regressions)}
# Compute results for each model
keys = ['Ridge', 'Lasso']
for k in keys:
    scores = cross_val_score(methods[k], x_train, np.ravel(y_train), cv=kfold, scoring = 'neg_mean_squared_error')
    results.loc[k] = -1*np.mean(scores)
results.round(4)

Suggests which model best performs on the test dataset

In [None]:
x_test = test[predictors]
y_test = test[response]

# Use KFold for Cross validator
kfold = KFold(10, shuffle=True, random_state=1)

# Data frame column and row 
columns = ['CV MSE']
rows = ['Ridge', 'Lasso']

regressions = [lassoaic_best, ridge]
# Create Data frame that holds the performance on each model
results = pd.DataFrame(0.0, columns=columns, index=rows)

methods = {k: v for k, v in zip(rows, regressions)}

# Compute results for each model
keys = ['Ridge', 'Lasso']
for k in keys:
    scores = cross_val_score(methods[k], x_test, np.ravel(y_test), cv=kfold, scoring = 'neg_mean_squared_error')
    results.loc[k] = -1*np.mean(scores)
results.round(4)

## Section 5: Neural Network Models

In [None]:
np.random.seed(1)
tf.random.set_seed(0)

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))

fitted_transformer = scaler.fit(x_train)
x_train_scaled = fitted_transformer.transform(x_train)

fitted_transformer = scaler.fit(x_test)
x_test_scaled = fitted_transformer.transform(x_test)

fitted_transformer = scaler.fit(y_test)
y_test_scaled = fitted_transformer.transform(y_test)

fitted_transformer = scaler.fit(y_train)
y_train_scaled = fitted_transformer.transform(y_train)

###Optimal Neural Network Model:
Sigmoid Activation



In [None]:
'''model = Sequential()
model.add(Dense(6, input_dim=6, activation='sigmoid'))
model.add(Dense(6, activation='sigmoid'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])
model.summary()
history = model.fit(x_train_scaled, y_train_scaled, epochs=250, batch_size=20, verbose=2)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model.predict(x_test_scaled)))'''

"model = Sequential()\nmodel.add(Dense(6, input_dim=6, activation='sigmoid'))\nmodel.add(Dense(6, activation='sigmoid'))\nmodel.add(Dense(1, activation='linear'))\nmodel.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])\nmodel.summary()\nhistory = model.fit(x_train_scaled, y_train_scaled, epochs=250, batch_size=20, verbose=2)\nmean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model.predict(x_test_scaled)))"

In [None]:
'''y_pred = model.predict(x_test_scaled)
y_pred = scaler.inverse_transform(y_pred)
# fitting the size of the plot
plt.figure(figsize=(15, 8))

# plotting the graphs for actual-value and predicted values
plt.plot(y_test, label = "True Values")
plt.plot(y_pred, label = "Predicted values")
plt.title("Neural Network Sigmoid Activation Model True versus Predicted Values of Withdraw Variable from Test Data")

# showing the plotting of predictive modelling technique
plt.legend()
plt.show()'''

In [None]:
'''plt.plot(history.history['mse'])
plt.xlabel("Epoch")
plt.ylabel("Mean Square Error (MSE)")
plt.title("MSE Training Evaluation Over Epochs")
plt.show()'''

### Other models considered

*   Tanh Activation with no, one and two hidden layers
*   Relu Activation with one and no hidden layers



In [None]:
'''model3 = Sequential()
model3.add(Dense(6, input_dim=6, activation='tanh'))
model3.add(Dense(6, activation='tanh'))
model3.add(Dense(1, activation='linear'))
model3.compile(loss='mean_squared_error', optimizer='adam')
model3.summary()
model3.fit(x_train_scaled, y_train_scaled, epochs=100, batch_size=20, verbose=0)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model3.predict(x_test_scaled)))'''

In [None]:
'''model4 = Sequential()
model4.add(Dense(6, input_dim=6, activation='tanh'))
model4.add(Dense(1, activation='linear'))
model4.compile(loss='mean_squared_error', optimizer='adam')
model4.summary()
model4.fit(x_train_scaled, y_train_scaled, epochs=100, batch_size=20, verbose=0)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model4.predict(x_test_scaled)))'''

In [None]:
'''model5 = Sequential()
model5.add(Dense(6, input_dim=6, activation='tanh'))
model5.add(Dense(6, activation='tanh'))
model5.add(Dense(6, activation='tanh'))
model5.add(Dense(1, activation='linear'))
model5.compile(loss='mean_squared_error', optimizer='adam')
model5.summary()
model5.fit(x_train_scaled, y_train_scaled, epochs=100, batch_size=20, verbose=0)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model5.predict(x_test_scaled)))'''

In [None]:
'''model6 = Sequential()
model6.add(Dense(6, input_dim=6, activation='relu'))
model6.add(Dense(6, activation='relu'))
model6.add(Dense(1, activation='linear'))
model6.compile(loss='MSE', optimizer='adam')
model6.fit(x_train_scaled, y_train_scaled, epochs=100, batch_size=20, verbose=0)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model6.predict(x_test_scaled)))'''

In [None]:
'''model2 = Sequential()
model2.add(Dense(6, input_dim=6, activation='relu'))
model2.add(Dense(1, activation='linear'))
model2.compile(loss='MSE', optimizer='adam')
model2.fit(x_train_scaled, y_train_scaled, epochs=100, batch_size=20, verbose=0)
mean_squared_error(scaler.inverse_transform(y_test_scaled), scaler.inverse_transform(model2.predict(x_test_scaled)))'''

## Section 6: OLS Models (Chosen Model)

Polynomial Feature Analysis for Interaction Terms

In [None]:
x_interaction = PolynomialFeatures(6, interaction_only=True, include_bias=False)
x_interaction_transformation = x_interaction.fit_transform(x_train)
interaction_df = pd.DataFrame(x_interaction_transformation, columns = x_interaction.get_feature_names())
interaction_model = linear_model.OLS(y_train, interaction_df).fit()
pd.DataFrame(interaction_model.pvalues[interaction_model.pvalues < 0.00001])

### Best and Chosen Model

In [None]:
formula = 'Withdraw ~ Shops*Downtown*ATMs*Weekday*Center + Downtown*ATMs*Weekday*Center + Shops*ATMs*Weekday*Center + Shops*Downtown*Weekday*Center + Shops*Downtown*ATMs*Center + Shops*Downtown*ATMs*Weekday + Shops*Downtown*ATMs + Shops*Downtown*Weekday + Shops*Downtown*Center + Shops*Weekday*Center + Shops*ATMs*Center + Shops*ATMs*Weekday + Downtown*ATMs*Weekday + Downtown*ATMs*Center + Downtown*Weekday*Center + ATMs*Weekday*Center + Downtown*ATMs + Shops*Downtown + Shops*ATMs + Shops*Weekday + Shops*Center + Downtown*Weekday + Downtown*Center + ATMs*Weekday + ATMs*Center + Weekday*Center + High + Center + Weekday + Shops + Downtown + ATMs'

In [None]:
lm = smf.ols(formula, df_train).fit()

In [None]:
lm.summary()

Test MSE For Chosen Model

In [None]:
mean_squared_error(y_test, lm.predict(x_test))

### Other models

In [None]:
lm = smf.ols('Withdraw ~ Shops + Downtown + ATMs + Weekday + Center + High', df_train).fit()
formula = 'Withdraw ~ Shops*Downtown*ATMs*Weekday*High + Downtown*ATMs*Weekday*High + Shops*ATMs*Weekday*High + Shops*Downtown*Weekday*High + Shops*Downtown*ATMs*High + Shops*Downtown*ATMs*Weekday + Shops*Downtown*ATMs + Shops*Downtown*Weekday + Shops*Downtown*High + Shops*Weekday*High + Shops*ATMs*High + Shops*ATMs*Weekday + Downtown*ATMs*Weekday + Downtown*ATMs*High + Downtown*Weekday*High + ATMs*Weekday*High + Downtown*ATMs + Shops*Downtown + Shops*ATMs + Shops*Weekday + Shops*High + Downtown*Weekday + Downtown*High + ATMs*Weekday + ATMs*High + Weekday*High + Center + High + Weekday + Shops + Downtown + ATMs'
lm = smf.ols(formula, df_train).fit()