## 0 - Import packages 

In [4]:
!pip install scikit-learn --upgrade

Collecting scikit-learn
[?25l  Downloading https://files.pythonhosted.org/packages/5c/a1/273def87037a7fb010512bbc5901c31cfddfca8080bc63b42b26e3cc55b3/scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl (6.8MB)
[K     |████████████████████████████████| 6.8MB 5.2MB/s 
Collecting threadpoolctl>=2.0.0
  Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl
Installing collected packages: threadpoolctl, scikit-learn
  Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
Successfully installed scikit-learn-0.23.2 threadpoolctl-2.1.0


In [5]:
!pip install plotly



In [6]:
!pip install git+https://github.com/Techtonique/nnetsauce.git

Collecting git+https://github.com/Techtonique/nnetsauce.git
  Cloning https://github.com/Techtonique/nnetsauce.git to /tmp/pip-req-build-ndp_206e
  Running command git clone -q https://github.com/Techtonique/nnetsauce.git /tmp/pip-req-build-ndp_206e
Collecting appdirs==1.4.4
  Downloading https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl
Collecting attrs==19.3.0
  Downloading https://files.pythonhosted.org/packages/a2/db/4313ab3be961f7a763066401fb77f7748373b6094076ae2bda2806988af6/attrs-19.3.0-py2.py3-none-any.whl
Collecting Babel==2.8.0
[?25l  Downloading https://files.pythonhosted.org/packages/15/a1/522dccd23e5d2e47aed4b6a16795b8213e3272c7506e625f2425ad025a19/Babel-2.8.0-py2.py3-none-any.whl (8.6MB)
[K     |████████████████████████████████| 8.6MB 7.7MB/s 
[?25hCollecting black==19.10b0
[?25l  Downloading https://files.pythonhosted.org/packages/fd/bb/ad34bbc93d1bea3de086d7c59e528d4a503ac8fe

In [7]:
!pip install pandas-profiling



In [32]:
import nnetsauce as ns
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns; 
import matplotlib
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import cross_val_score, train_test_split
from tqdm import tqdm

## 1 - Data wrangling

In [9]:
n_obs = 100

In [10]:
X_california, y_california = fetch_california_housing(return_X_y=True)

#X_california = X_california[:n_obs]
#y_california = y_california[:n_obs]

X_california.shape, y_california.shape

np.sum((X_california == np.nan))

0

## 2 - sensi hyperparams

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_california, y_california, random_state=2020)

In [12]:
fit_obj = ns.GLMRegressor(optimizer=ns.optimizers.Optimizer(type_optim="scd"))

### 2 - 1 lambda1 and lambda2

training+testing = f(lam1, lam2)

In [108]:
lams = [10, 1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5]

n_lams = len(lams)

table_mean_2_2 = np.zeros((n_lams, n_lams))
table_std_2_2 = np.zeros((n_lams, n_lams))

In [None]:
for idx, lam1 in enumerate(tqdm(lams)): 
  for idy, lam2 in enumerate(lams):     
    fit_obj = ns.GLMRegressor(lambda1=lam1,        
                              lambda2=lam2)    
    fit_obj.optimizer.type_optim = "scd"
    temp = cross_val_score(fit_obj, X_california, y_california, 
                          cv=5, scoring="neg_root_mean_squared_error")
    print(f"lam1: {lam1} lam2: {lam2} -----")
    print("RMSE: %0.2f (+/- %0.2f)" % (temp.mean(), temp.std() * 1.96))
    table_mean_2_2[idx, idy] = temp.mean()                    

In [110]:
lamschar = ["lam" + str(lams[i]) for i in range(len(lams))]

In [111]:
print(lamschar)

['lam10', 'lam1', 'lam0.1', 'lam0.01', 'lam0.001', 'lam0.0001', 'lam1e-05']


In [112]:
fig = px.imshow(-table_mean_2_2)
fig.show()

In [113]:
-table_mean_2_2

array([[1.26421692, 1.24017418, 1.10569816, 1.10585964, 1.08557966,
        1.0857072 , 1.08572028],
       [1.08546856, 1.06911261, 1.08845107, 1.09727657, 1.10497176,
        1.1034343 , 1.10113168],
       [0.842099  , 0.82003756, 0.81753023, 0.83186007, 0.83318558,
        0.83335359, 0.83330675],
       [0.83508333, 0.77866334, 0.77560429, 0.78160995, 0.78257308,
        0.78257554, 0.78257705],
       [0.83905145, 0.77694226, 0.77577944, 0.78102693, 0.78199417,
        0.78197908, 0.78197757],
       [0.83916784, 0.77621228, 0.77612711, 0.78084233, 0.78200064,
        0.78198555, 0.78198404],
       [0.8391803 , 0.7763475 , 0.77564145, 0.78082394, 0.78200129,
        0.78198619, 0.78198468]])

### 2 - 2 alpha1 and alpha2

In [114]:
alphas = [0, 0.25, 0.5, 0.75, 1]

n_alphas = len(alphas)

table_mean_2_2_ = np.zeros((n_alphas, n_alphas))
table_std_2_2_ = np.zeros((n_alphas, n_alphas))

In [None]:
for idx, alpha1 in enumerate(tqdm(alphas)): 
  for idy, alpha2 in enumerate(alphas):  
    fit_obj = ns.GLMRegressor(alpha1=alpha1,        
                              alpha2=alpha2)    
    fit_obj.optimizer.type_optim = "scd"
    temp = cross_val_score(fit_obj, X_california, y_california, 
                          cv=5, scoring="neg_root_mean_squared_error")
    print(f"lam1: {lam1} lam2: {lam2} -----")
    print("RMSE: %0.2f (+/- %0.2f)" % (temp.mean(), temp.std() * 1.96))
    table_mean_2_2_[idx, idy] = temp.mean()                    


In [116]:
fig = px.imshow(-table_mean_2_2_)
fig.show()

In [117]:
table_mean_2_2_

array([[-0.78119607, -0.78102062, -0.78086321, -0.78082528, -0.78057261],
       [-0.78160261, -0.78125523, -0.78122231, -0.78111776, -0.78099535],
       [-0.78201072, -0.7816969 , -0.78160995, -0.78150382, -0.78127023],
       [-0.78237107, -0.78216276, -0.7820218 , -0.78194866, -0.78160877],
       [-0.78291188, -0.78271993, -0.78261015, -0.78255768, -0.78226477]])

### 2 - 3 learning rate

In [135]:
fit_obj2 = ns.GLMRegressor(alpha1=0,        
                           alpha2=1, 
                           lambda1=1.0,        
                           lambda2=1.0)

fit_obj2.optimizer.type_optim = "scd"

fit_obj2.fit(X_train, y_train, verbose=1, learning_rate=0.01, batch_prop=0.5)

df = pd.DataFrame(fit_obj2.optimizer.results[2], columns=["loss_function"])

df["iteration"] = [i for i in range(len(fit_obj2.optimizer.results[2]))]


temp2 =  cross_val_score(fit_obj2, X_california, y_california, 
                cv=5, scoring="neg_root_mean_squared_error")

print("RMSE: %0.2f (+/- %0.2f)" % (temp2.mean(), temp2.std() * 1.96))



100%|██████████| 100/100 [00:05<00:00, 17.06it/s]


RMSE: -0.94 (+/- 0.12)


In [136]:
px.line(df, x="iteration", y="loss_function", title='loss function = f(number of iterations)(learning rate=0.01)')

In [137]:
fit_obj3 = ns.GLMRegressor(alpha1=0,        
                           alpha2=1, 
                           lambda1=1.0,        
                           lambda2=1.0)

fit_obj3.optimizer.type_optim = "scd"

fit_obj3.fit(X_train, y_train, verbose=1, learning_rate=0.025, batch_prop=0.5)

df = pd.DataFrame(fit_obj3.optimizer.results[2], columns=["loss_function"])

df["iteration"] = [i for i in range(len(fit_obj3.optimizer.results[2]))]


temp3 =  cross_val_score(fit_obj3, X_california, y_california, 
                cv=5, scoring="neg_root_mean_squared_error")

print("RMSE: %0.2f (+/- %0.2f)" % (temp3.mean(), temp3.std() * 1.96))


100%|██████████| 100/100 [00:06<00:00, 16.58it/s]


RMSE: -0.94 (+/- 0.12)


In [138]:
px.line(df, x="iteration", y="loss_function", title='loss function = f(number of iterations) (learning rate=0.025)')

In [139]:
fit_obj4 = ns.GLMRegressor(alpha1=0,        
                           alpha2=1, 
                           lambda1=1.0,        
                           lambda2=1.0)

fit_obj4.optimizer.type_optim = "scd"

fit_obj4.fit(X_train, y_train, verbose=1, learning_rate=0.05, batch_prop=0.5)

df = pd.DataFrame(fit_obj4.optimizer.results[2], columns=["loss_function"])

df["iteration"] = [i for i in range(len(fit_obj4.optimizer.results[2]))]

temp4 =  cross_val_score(fit_obj4, X_california, y_california, 
                cv=5, scoring="neg_root_mean_squared_error")

print("RMSE: %0.2f (+/- %0.2f)" % (temp4.mean(), temp4.std() * 1.96))


100%|██████████| 100/100 [00:05<00:00, 18.07it/s]


RMSE: -0.94 (+/- 0.12)


In [140]:
px.line(df, x="iteration", y="loss_function", title='loss function = f(number of iterations) (learning rate=0.05)')

In [141]:
fit_obj5 = ns.GLMRegressor(alpha1=0,        
                           alpha2=1, 
                           lambda1=1.0,        
                           lambda2=1.0)

fit_obj5.optimizer.type_optim = "scd"

fit_obj5.fit(X_train, y_train, verbose=1, learning_rate=0.1, batch_prop=0.5)

df = pd.DataFrame(fit_obj5.optimizer.results[2], columns=["loss_function"])

df["iteration"] = [i for i in range(len(fit_obj5.optimizer.results[2]))]

temp5 =  cross_val_score(fit_obj5, X_california, y_california, 
                cv=5, scoring="neg_root_mean_squared_error")

print("RMSE: %0.2f (+/- %0.2f)" % (temp5.mean(), temp5.std() * 1.96))



100%|██████████| 100/100 [00:05<00:00, 18.04it/s]


RMSE: -0.94 (+/- 0.12)


In [142]:
px.line(df, x="iteration", y="loss_function", title='loss function = f(number of iterations) (learning rate=0.1)')