In [1]:
# Data processing packages
import numpy as np
import pandas as pd
from collections import Counter

# Machine learning packages
from sklearn.model_selection import GridSearchCV, RepeatedKFold, cross_val_score, KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MultiLabelBinarizer, FunctionTransformer
from sklearn.pipeline import Pipeline
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.feature_selection import RFE, SelectPercentile, chi2, mutual_info_regression, SelectFromModel
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, KernelPCA, FastICA
from sklearn.manifold import TSNE
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor

import torch
# from keras.models import Sequential
# from keras.layers import Activation, Dense
# from keras import optimizers
# from keras.layers import BatchNormalization

# Visualization packages
import seaborn as sns
import matplotlib.pyplot as plt

# Others
import time
from pathlib import Path

## 5. Models

In [2]:
X1_pca = torch.load('X1_pca').to_numpy()
X1_ica = torch.load('X1_ica').to_numpy()
X1_tsne = torch.load('X1_tsne').to_numpy()
Y1 = pd.read_csv("Y1.csv", header=None, names=['revenue ']).to_numpy().ravel()
X_train_pca, X_test_pca, y_train, y_test = train_test_split(X1_pca, Y1, random_state=42, test_size=0.1)
X_train_ica, X_test_ica, _, _ = train_test_split(X1_ica, Y1, random_state=42, test_size=0.1)
X_train_tsne, X_test_tsne, _, _ = train_test_split(X1_tsne, Y1, random_state=42, test_size=0.1)

### 5.3 MLP

In [3]:
mlp = MLPRegressor(random_state=42, max_iter=1000, verbose=0)
hidden_layer_lists = [
    (200,),
    # (200, 175,),
    (200, 175, 150,),
    # (200, 175, 150, 125,),
    (200, 175, 150, 125, 100,),
    # (200, 175, 150, 125, 100, 75,),
    (200, 175, 150, 125, 100, 75, 50,),
    # (200, 175, 150, 125, 100, 75, 50, 25,),
    (200, 175, 150, 125, 100, 75, 50, 25, 10)
]
mlp_parameter_grid = {
    'hidden_layer_sizes': hidden_layer_lists,
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'batch_size': [177],
    'learning_rate': ['constant','adaptive'],
}

In [4]:
mlp_grid_pca = GridSearchCV(mlp, mlp_parameter_grid, cv=10, scoring='neg_root_mean_squared_error', return_train_score=True,verbose=2)
mlp_grid_ica = GridSearchCV(mlp, mlp_parameter_grid, cv=10, scoring='neg_root_mean_squared_error', return_train_score=True,verbose=2)
mlp_grid_tsne = GridSearchCV(mlp, mlp_parameter_grid, cv=10, scoring='neg_root_mean_squared_error', return_train_score=True,verbose=2)

In [5]:
mlp_grid_pca.fit(X_train_pca, np.log(1 + y_train))

Fitting 10 folds for each of 80 candidates, totalling 800 fits
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  29.2s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  32.2s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  42.7s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  39.5s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  26.0s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, solver=sgd; total time=  21.7s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200,), learning_rate=constant, so



[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=adaptive, solver=sgd; total time= 1.6min
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=adaptive, solver=sgd; total time=  48.7s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=adaptive, solver=sgd; total time=  37.4s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=adaptive, solver=sgd; total time= 1.1min
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=adaptive, solver=sgd; total time=  41.2s
[CV] END activation=tanh, alpha=0.0001, batch_size=177, hidden_layer_sizes=(200, 175, 150, 125, 100, 75, 50, 25, 10), learning_rate=ada

GridSearchCV(cv=10,
             estimator=MLPRegressor(max_iter=1000, random_state=42, verbose=0),
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05], 'batch_size': [177],
                         'hidden_layer_sizes': [(200,), (200, 175, 150),
                                                (200, 175, 150, 125, 100),
                                                (200, 175, 150, 125, 100, 75,
                                                 50),
                                                (200, 175, 150, 125, 100, 75,
                                                 50, 25, 10)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']},
             return_train_score=True, scoring='neg_root_mean_squared_error',
             verbose=2)

In [6]:
torch.save(mlp_grid_pca, "../models/mlp_grid_pca")

In [7]:
print("The best parameters are:", mlp_grid_pca.best_params_)
print("The best RMSE is:", mlp_grid_pca.best_score_)

The best parameters are: {'activation': 'tanh', 'alpha': 0.0001, 'batch_size': 177, 'hidden_layer_sizes': (200,), 'learning_rate': 'constant', 'solver': 'adam'}
The best RMSE is: -2.366255956054574


In [None]:
# y_pred_mlp_pca = np.exp(mlp_grid_pca.predict(X_test_pca)) - 1
# print("Root mean squared error (ICA Embeddings): {:.2f} Millions".format(np.sqrt(mean_squared_error(y_test, y_pred_mlp_pca)) / 1000000))

In [None]:
mlp_grid_ica.fit(X_train_ica, np.log(1 + y_train))

In [None]:
torch.save(mlp_grid_ica, "../models/mlp_grid_ica")

In [None]:
print("The best parameters are:", mlp_grid_ica.best_params_)
print("The best RMSE is:", mlp_grid_ica.best_score_)

In [None]:
mlp_grid_tsne.fit(X_train_tsne, np.log(1 + y_train))

In [None]:
torch.save(mlp_grid_tsne, "../models/mlp_grid_tsne")

In [None]:
print("The best parameters are:", mlp_grid_tsne.best_params_)
print("The best RMSE is:", mlp_grid_tsne.best_score_)