# Libraries

In [26]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from sklearn import metrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Import data

In [27]:
%run Data_preprocessing.ipynb

************************************************************
There are 6 set of X
X_*_nozero
------------------------------------------------------------
Normalized version
X_*_norm
------------------------------------------------------------
There are 4 set of Y
y_train_t1, y_train_t2, y_valid_t1, y_valid_t2
when training, please use: 'y_train_t1_value,y_train_t2_value,y_valid_t1_value,y_valid_t2_value'
************************************************************


In [28]:
print(X_train_norm.shape)
print(X_valid_norm.shape)
print(X_test_norm.shape)

(16760, 61)
(2394, 61)
(4790, 61)


# Neural Network Using Sklearn build in function

In [55]:
from sklearn.neural_network import MLPRegressor

clf = MLPRegressor(hidden_layer_sizes=(100,), 
                   activation='relu',
                   solver='adam', 
                   alpha=0.0001, 
                   batch_size='auto', 
                   learning_rate='constant', 
                   learning_rate_init=0.001, 
                   power_t=0.5, 
                   max_iter=200, 
                   shuffle=True, 
                   random_state=None, 
                   tol=0.0001, 
                   verbose=False, 
                   warm_start=False, 
                   momentum=0.9, 
                   nesterovs_momentum=True, 
                   early_stopping=True, 
                   validation_fraction=0.1, 
                   beta_1=0.9, 
                   beta_2=0.999, 
                   epsilon=1e-08, 
                   n_iter_no_change=10, 
                   max_fun=15000)

clf.fit(X_train_norm, y_train_t2_value)

clf.score(X_valid_norm, y_valid_t2_value)

0.1401175070776539

In [56]:
y_pred = clf.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)

1.849308534272786

# Evaluation Matrix

In [31]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

# Hyperparameter and setting tuning

## layers

In [None]:
layer_list = []
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]
for i in range(1,20,5):
    for j in range(1,20,5):
        layer_list.append((i,j))
        
for i in layer_list:
    clf = MLPRegressor(solver="lbfgs",
                        alpha=1e-5,
                        hidden_layer_sizes=i,
                        random_state=1,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
index = roc_auc_lst.index(max(roc_auc_lst))
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k-')
plt.plot(fpr_lst[index], tpr_lst[index], label='FN(area = {:.3f})'.format(max(roc_auc_lst)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title(f"Layer {layer_list[index]} - ROC curve")
plt.legend(loc="best")
plt.show()

In [None]:
best_layernumber = layer_list[index]
best_layernumber

## Solver

In [None]:
solver_list = ["lbfgs","sgd","adam"]
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]

for i in solver_list:
    clf = MLPRegressor(solver=i,
                        alpha=1e-5,
                        hidden_layer_sizes=best_layernumber,
                        random_state=1,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
fig, axes = plt.subplots(1, 3, sharex=True, sharey=True)
plt.figure(figsize=(5,5))
for j in range(3):
    plt.figure(1)
    axes[j].plot([0, 1], [0, 1], 'k-')
    axes[j].plot(fpr_lst[j], tpr_lst[j], label='FN(area = {:.3f})'.format(roc_auc_lst[j]))
    axes[j].set_xlabel('False positive rate')
    axes[j].set_ylabel('True positive rate')
    axes[j].set_title(f"{solver_list[j]} - ROC curve")
    axes[j].legend(loc="best")
plt.show()

In [None]:
index = roc_auc_lst.index(max(roc_auc_lst))
best_solver = solver_list[index]
best_solver

## Learning rate tuning

In [None]:
fpr_lst=[]
tpr_lst=[]
roc_auc_lst=[]
lr_list = ["constant", "invscaling", "adaptive"]

for i in lr_list:
    clf = MLPRegressor(solver=best_solver,
                        alpha=1e-5,
                        hidden_layer_sizes=best_layernumber,
                        learning_rate=i,
                        random_state=3612,
                        max_iter=200)

    clf.fit(X_train_norm, y_train_t2_value)
    clf.score(X_valid_norm, y_valid_t2_value)
    y_pred = clf.predict(X_valid_norm)
    fpr, tpr, _ = metrics.roc_curve(y_valid_t2_value, y_pred)
    roc_auc = metrics.roc_auc_score(y_valid_t2_value, y_pred)
    
    fpr_lst.append(fpr)
    tpr_lst.append(tpr)
    roc_auc_lst.append(roc_auc)
    
    
index = roc_auc_lst.index(max(roc_auc_lst))
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k-')
plt.plot(fpr_lst[index], tpr_lst[index], label='FN(area = {:.3f})'.format(max(roc_auc_lst)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title(f"Learning Rate {lr_list[index]} - ROC curve")
plt.legend(loc="best")
plt.show()

In [None]:
index = roc_auc_lst.index(max(roc_auc_lst))
best_lr_choice =  lr_list[index]
best_lr_choice

# Test

In [61]:
# Final model
reg1 = MLPRegressor(hidden_layer_sizes=(1000,), 
                   activation='relu',
                   solver='adam', 
                   alpha=0.0001, 
                   batch_size='auto', 
                   learning_rate='constant', 
                   learning_rate_init=0.001, 
                   power_t=0.5, 
                   max_iter=200, 
                   shuffle=True, 
                   random_state=None, 
                   tol=0.0001, 
                   verbose=False, 
                   warm_start=False, 
                   momentum=0.9, 
                   nesterovs_momentum=True, 
                   early_stopping=True, 
                   validation_fraction=0.1, 
                   beta_1=0.9, 
                   beta_2=0.999, 
                   epsilon=1e-08, 
                   n_iter_no_change=10, 
                   max_fun=15000)

reg1.fit(X_train_norm, y_train_t2_value)

MLPRegressor(early_stopping=True, hidden_layer_sizes=(1000,))

In [62]:
y_pred = clf.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)

1.849308534272786

In [63]:
X_test_norm

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.000000,2.882147e-18,0.000000,0.193160,6.016953e-17,5.609774e-18,0.971208,-2.477535e-17,-0.700754,-0.085412,...,-1.094667,0.761543,-0.269197,0.796149,-2.749614e+00,3.198461e+00,-5.959798e-01,4.188770e-17,-1.662156e-17,0.766021
1,-0.312102,1.723721e+00,-0.090785,-1.308996,-2.810434e-01,-3.818694e-01,1.211987,-3.767748e-01,-0.343800,-0.015321,...,0.000000,0.298294,-0.175260,0.057241,3.604995e-17,3.454719e-17,1.042083e-17,4.188770e-17,-1.521141e-01,2.209922
2,-0.284284,1.005956e+00,0.385820,0.193160,-2.930417e-01,-7.992430e-01,0.730430,-5.511088e-01,-0.547773,0.405221,...,0.000000,-0.164956,1.186824,-1.226029,3.604995e-17,3.454719e-17,1.042083e-17,4.188770e-17,-6.449774e-01,-0.527474
3,0.000000,2.882147e-18,0.000000,-0.708134,6.016953e-17,5.609774e-18,-0.232683,-2.477535e-17,0.013154,0.000000,...,-2.765092,-0.396581,-0.410103,1.418316,1.164973e+00,1.111059e+00,5.531141e-02,4.188770e-17,2.380694e-01,-0.632759
4,-0.337139,2.882147e-18,-0.155191,-0.708134,-2.981838e-01,2.539745e+00,-0.232683,-5.511088e-01,-0.343800,-0.786317,...,0.000000,0.298294,-1.584314,1.184969,3.604995e-17,3.454719e-17,1.042083e-17,4.188770e-17,-1.662156e-17,-1.550237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4785,0.000000,2.882147e-18,0.000000,-0.407702,6.016953e-17,5.609774e-18,1.452765,-2.477535e-17,0.013154,0.545402,...,-3.878709,-0.396581,-1.255535,-0.214951,-7.502785e-01,-5.672995e+00,-1.799938e-01,4.188770e-17,-1.091635e+00,-0.346986
4786,0.000000,2.882147e-18,0.000000,0.493592,6.016953e-17,5.609774e-18,-1.195797,-2.477535e-17,2.103882,0.335131,...,0.000000,-2.017954,0.951982,-0.603824,3.604995e-17,3.454719e-17,1.042083e-17,-5.910209e-01,-4.692437e-01,0.029029
4787,0.000000,2.882147e-18,0.000000,-0.407702,6.016953e-17,5.609774e-18,-0.714240,-2.477535e-17,-0.751747,0.000000,...,0.297353,-0.396581,-0.316166,0.718344,-8.250200e-01,6.735880e-02,-1.799938e-01,4.188770e-17,4.074914e-01,-0.256743
4788,0.000000,2.882147e-18,0.000000,1.094454,6.016953e-17,-1.216617e+00,-0.232683,-2.477535e-17,0.472094,0.265040,...,0.000000,0.529918,1.139856,-0.914922,3.604995e-17,3.454719e-17,1.042083e-17,-5.283017e-01,-6.449774e-01,1.096915


In [64]:

result = reg1.predict(X_test_norm)
res1 = pd.DataFrame(result.tolist(), columns=["Predicted"])
res1.index = X_test.index
res1.index.name = 'Id'
pd.DataFrame(res1).to_csv('out1.csv')
#print(torch.count_nonzero(torch.from_numpy(np.array(res1)).to(torch.float32)))
res1.sum(axis=0)

Predicted    15617.634519
dtype: float64

In [None]:
# Final model
reg1 = MLPRegressor(hidden_layer_sizes=(100,100), 
                   activation='relu',
                   solver='adam', 
                   alpha=0.01,
                   learning_rate="adaptive", 
                   learning_rate_init=0.001, 
                   max_iter=200, 
                   shuffle=True, 
                   random_state=3612, 
                   warm_start=True, 
                   momentum=0.9, 
                   early_stopping=True, 
                   )

reg1.fit(X, y)
y_pred = reg1.predict(X_valid_norm)
rmse(y_pred, y_valid_t2_value)