# Libraries

In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn import metrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Import data

In [3]:
%run Data_preprocessing.ipynb

************************************************************
There are 6 set of X
X_*_nozero
------------------------------------------------------------
Normalized version
X_*_norm
------------------------------------------------------------
There are 4 set of Y
y_train_t1, y_train_t2, y_valid_t1, y_valid_t2
when training, please use: 'y_train_t1_value,y_train_t2_value,y_valid_t1_value,y_valid_t2_value'
************************************************************


In [4]:
print(X_train_norm.shape)
print(X_valid_norm.shape)
print(X_test_norm.shape)

(16760, 61)
(2394, 61)
(4790, 61)


# Neural Network Using Sklearn build in function

In [5]:
from sklearn.neural_network import MLPRegressor

clf = MLPRegressor(hidden_layer_sizes=(100,), 
                   activation='relu',
                   solver='adam', 
                   alpha=0.0001, 
                   batch_size='auto', 
                   learning_rate='constant', 
                   learning_rate_init=0.01, 
                   power_t=0.5, 
                   max_iter=200, 
                   shuffle=True, 
                   random_state=None, 
                   tol=0.0001, 
                   verbose=False, 
                   warm_start=False, 
                   momentum=0.9, 
                   nesterovs_momentum=True, 
                   early_stopping=True, 
                   validation_fraction=0.1, 
                   beta_1=0.9, 
                   beta_2=0.999, 
                   epsilon=1e-08, 
                   n_iter_no_change=10, 
                   max_fun=15000)

clf.fit(X_train_norm, y_train_t2_value)

clf.score(X_valid_norm, y_valid_t2_value)

0.12321964534531882

In [6]:
y_pred = clf.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)

NameError: name 'rmse' is not defined

# Evaluation Matrix

In [7]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

# Hyperparameter and setting tuning

## layers

In [None]:
layer_list = []
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]
for i in range(1,20,5):
    for j in range(1,20,5):
        layer_list.append((i,j))
        
for i in layer_list:
    clf = MLPRegressor(solver="lbfgs",
                        alpha=1e-5,
                        hidden_layer_sizes=i,
                        random_state=1,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
index = roc_auc_lst.index(max(roc_auc_lst))
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k-')
plt.plot(fpr_lst[index], tpr_lst[index], label='FN(area = {:.3f})'.format(max(roc_auc_lst)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title(f"Layer {layer_list[index]} - ROC curve")
plt.legend(loc="best")
plt.show()

In [None]:
best_layernumber = layer_list[index]
best_layernumber

## Solver

In [None]:
solver_list = ["lbfgs","sgd","adam"]
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]

for i in solver_list:
    clf = MLPRegressor(solver=i,
                        alpha=1e-5,
                        hidden_layer_sizes=best_layernumber,
                        random_state=1,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
fig, axes = plt.subplots(1, 3, sharex=True, sharey=True)
plt.figure(figsize=(5,5))
for j in range(3):
    plt.figure(1)
    axes[j].plot([0, 1], [0, 1], 'k-')
    axes[j].plot(fpr_lst[j], tpr_lst[j], label='FN(area = {:.3f})'.format(roc_auc_lst[j]))
    axes[j].set_xlabel('False positive rate')
    axes[j].set_ylabel('True positive rate')
    axes[j].set_title(f"{solver_list[j]} - ROC curve")
    axes[j].legend(loc="best")
plt.show()

In [None]:
index = roc_auc_lst.index(max(roc_auc_lst))
best_solver = solver_list[index]
best_solver

## Learning rate tuning

In [None]:
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]
lr_list = ["constant", "invscaling", "adaptive"]

for i in lr_list:
    clf = MLPRegressor(solver=best_solver,
                        alpha=1e-5,
                        hidden_layer_sizes=best_layernumber,
                        learning_rate=i,
                        random_state=3612,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
index = roc_auc_lst.index(max(roc_auc_lst))
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k-')
plt.plot(fpr_lst[index], tpr_lst[index], label='FN(area = {:.3f})'.format(max(roc_auc_lst)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title(f"Learning Rate {lr_list[index]} - ROC curve")
plt.legend(loc="best")
plt.show()

In [None]:
index = roc_auc_lst.index(max(roc_auc_lst))
best_lr_choice =  lr_list[index]
best_lr_choice

# Test

In [68]:
X = np.concatenate((X_train_norm,X_valid_norm),axis=0)
y = np.concatenate((y_train_t2_value,y_valid_t2_value),axis=0)

In [69]:
y.apply()

array([6.06456019, 3.67291667, 5.32305556, ..., 9.14388889, 6.12417824,
       3.81853009])

In [67]:
# Final model
reg1 = MLPRegressor(hidden_layer_sizes=(20,1000), 
                   activation='logistic',
                   solver='adam', 
                   alpha=0.2,
                   learning_rate="adaptive", 
                   learning_rate_init=0.1, 
                   max_iter=300, 
                   shuffle=True, 
                   random_state=3612, 
                   warm_start=True, 
                   momentum=0.9, 
                   early_stopping=True, 
                   )

reg1.fit(X,y)
y_pred = reg1.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)

1.8331037070883331

In [51]:
y_pred = reg1.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)

1.7867877680306063

In [33]:
X_test_norm.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
count,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,...,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0,4790.0
mean,-1.483388e-18,-1.483388e-17,5.933551e-18,-5.933551e-18,4.524333e-17,-7.416939e-18,1.18671e-17,-1.112541e-17,6.675245e-18,2.225082e-18,...,-1.18671e-17,-1.18671e-17,-1.780065e-17,2.37342e-17,0.0,1.18671e-17,0.0,1.780065e-17,-2.225082e-17,0.0
std,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,...,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104,1.000104
min,-0.3566118,-4.736161,-1.108402,-2.510722,-0.3170383,-1.216617,-3.844358,-0.6382758,-1.159694,-2.118036,...,-3.878709,-5.955574,-5.670569,-6.841345,-5.085287,-5.672995,-2.066637,-0.6447803,-4.238773,-1.775847
25%,-0.270375,2.882147e-18,-0.2711224,-0.7081335,-0.2519048,5.609774e-18,-0.4734616,-0.3767748,-0.5987669,-0.2956834,...,0.0,-0.6282054,-0.7388818,-0.6038237,-0.02038072,3.454719e-17,1.0420830000000002e-17,4.18877e-17,-0.4909576,-0.572596
50%,0.0,2.882147e-18,0.0,-0.1072709,6.016953000000001e-17,5.609774e-18,0.008095031,-2.477535e-17,-0.2928067,0.0,...,0.0,0.06666879,-0.08132357,0.0183723,3.6049950000000004e-17,3.454719e-17,1.0420830000000002e-17,4.18877e-17,-1.6621560000000002e-17,-0.151458
75%,0.0,2.882147e-18,0.0,0.4935918,6.016953000000001e-17,5.609774e-18,0.4896517,-2.477535e-17,0.217127,0.2650404,...,0.01894909,0.5299182,0.6232031,0.6405395,3.6049950000000004e-17,0.0673588,1.0420830000000002e-17,4.18877e-17,0.1918637,0.359923
max,27.45615,6.030308,28.47978,9.806963,28.65711,23.40842,6.268331,18.14622,10.4158,60.75313,...,6.14384,7.247035,3.86794,3.673771,6.966784,4.764012,23.14463,18.02763,8.172467,23.567625


In [60]:

result = reg1.predict(X_test_norm)
res1 = pd.DataFrame(result.tolist(), columns=["Predicted"])
res1.index = X_test.index
res1.index.name = 'Id'
pd.DataFrame(res1).to_csv('out1.csv')
#print(torch.count_nonzero(torch.from_numpy(np.array(res1)).to(torch.float32)))
res1

Unnamed: 0_level_0,Predicted
Id,Unnamed: 1_level_1
32_175413_295037,4.384071
33_176176_296681,3.040878
42_119203_210828,2.364585
44_181750_291554,2.536625
61_176332_252348,2.854503
...,...
99883_150755_276467,3.100880
99897_162913_266801,2.738057
99913_175989_292375,2.488737
99944_185654_221067,2.989108
