# Performance comparaison

| Fingerprint | R2 | RMSE |  
|:-:|:-:|:-:|  
| ECFP| 0.765 | 0.9808 |
|Can2Can|0.7176|1.073|
|Enum2Enum|0.725|1.059|
|Transformer|0.862|0.750|
| NFP| 0.8845 | 0.6868 |

In [11]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

import chainer
from chainer import serializers
from seq2seq.seq2seq import Seq2seq, load_vocabulary
import sys
sys.path.append('./transformer')
from net import Transformer
import preprocess
import optuna

UNK, EOS = 0, 1

In [3]:
df_train = pd.read_csv('data/sol_train.csv')
df_test = pd.read_csv('data/sol_test.csv')
df_train.head()

Unnamed: 0,SMILES,unknown,solubility,processed_smiles,spaced
0,[nH0]1c(SC)c2c([nH0]cc[nH0]2)[nH0]c1,6966-78-5,-2.36,[ n H 0 ] 1 c ( S C ) c 2 c ( [ n H 0 ] c c [ ...,[ n H 0 ] 1 c ( S C ) c 2 c ( [ n H 0 ] c c [ ...
1,CCC(C)Cl,78-86-4,-1.96,C C C ( C ) Cl,C C C ( C ) C l
2,O=C(NC(=O)c1ccccc1)c1ccccc1,614-28-8,-2.27,O = C ( N C ( = O ) c 1 c c c c c 1 ) c 1 c c ...,O = C ( N C ( = O ) c 1 c c c c c 1 ) c 1 c c ...
3,CC(C(C)(C)C)O,464-07-3,-0.62,C C ( C ( C ) ( C ) C ) O,C C ( C ( C ) ( C ) C ) O
4,[O-][N+](c1c(O)cccc1)=O,88-75-5,-1.74,[ O- ] [ N+ ] ( c 1 c ( O ) c c c c 1 ) = O,[ O - ] [ N + ] ( c 1 c ( O ) c c c c 1 ) = O


In [4]:
x_train = df_train['processed_smiles']
y_train = df_train['solubility']
x_test = df_test['processed_smiles']
y_test = df_test['solubility']

In [5]:
def load_data(vocabulary, lst):
    data = []
    for l in lst:
        words = l.strip().split()
        array = np.array([vocabulary.get(w, UNK) for w in words], np.int32)
        data.append(array)
    return data

In [6]:
source_ids = load_vocabulary('data/Enum2Enum/vocab2.txt')
xnum_train = load_data(source_ids, x_train)
xnum_test = load_data(source_ids, x_test)

# Encode to fingerprint

In [7]:
# Can2Can
model = Seq2seq(1, 46, 46, 256)
chainer.serializers.load_npz('../result/can2can_iter_132000.npz', model)

In [31]:
# Enum2Enum
model = Seq2seq(2, 43, 43, 256)
chainer.serializers.load_npz('../result/Enum2Enum/model_epoch_3.npz', model)

In [8]:
X_train = model.encode(xnum_train)[0].data
X_test = model.encode(xnum_test)[0].data

In [25]:
# Transformer
x_train = df_train['spaced']
y_train = df_train['solubility']
x_test = df_test['spaced']
y_test = df_test['solubility']

In [27]:
en_path = os.path.join('data/Enum2Enum', 'sval.txt')
source_vocab = ['<eos>', '<unk>', '<bos>'] + \
    preprocess.count_words(en_path, 50)
source_ids = {word: index for index, word in enumerate(source_vocab)}
source_words = {i: w for w, i in source_ids.items()}

100% (500000 of 500000) |################| Elapsed Time: 0:00:41 Time:  0:00:41


In [29]:
# Transformer
model = Transformer(2, 38, 38,
        256,
        h=4,
        dropout=0.1,
        max_length=500,
        use_label_smoothing=False,
        embed_position=False)

def encode(x):
    words = preprocess.split_sentence(x)
    x = model.xp.array([source_ids.get(w, 1) for w in words], 'i')
    h = model.encode([x])
    return np.mean(h.data[0], axis=1)

chainer.serializers.load_npz('transformer/result/best_model.npz', model)

In [30]:
X_train = [encode(x_train[i]) for i in range(len(x_train))]
X_test = [encode(x_test[i]) for i in range(len(x_test))]

# Prediction
## Can2Can
### MLP

In [14]:
# Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor()
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6502 ± 0.0132
Test MSE: 1.4281 ± 0.0537


In [12]:
def objective_mlp(trial):
    n_layers = trial.suggest_int('n_layers', 1,3)
    layers = []
    for i in range(n_layers):
        n_units = int(trial.suggest_loguniform('n_units_l{}'.format(i), 1, 1000))
        layers.append(n_units)
    
    n_folds = 4
    score = 0
    for _ in range(n_folds):
        mlp = MLPRegressor(hidden_layer_sizes=layers)
        X_trn, X_val, y_trn, y_val = train_test_split(X_train, y_train)
        mlp.fit(X_trn, y_trn)
        y_pred = mlp.predict(X_val)
        score += mean_squared_error(y_val, y_pred)
    return score/n_folds

study = optuna.create_study()
study.optimize(objective_mlp, n_trials=100)

[I 2019-03-27 17:30:24,034] Finished a trial resulted in value: 2.1968488532576362. Current best value is 2.1968488532576362 with parameters: {'n_layers': 3, 'n_units_l0': 4.496636026634388, 'n_units_l1': 288.5097075695691, 'n_units_l2': 117.02275033245998}.
[I 2019-03-27 17:30:27,671] Finished a trial resulted in value: 1.6107835524105347. Current best value is 1.6107835524105347 with parameters: {'n_layers': 1, 'n_units_l0': 29.20135350161142}.
[I 2019-03-27 17:30:52,812] Finished a trial resulted in value: 1.5099048867093632. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:30:55,506] Finished a trial resulted in value: 1.5503938256091052. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:30:59,936] Finis

[I 2019-03-27 17:31:12,551] Finished a trial resulted in value: 1.8102511558439862. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:31:16,826] Finished a trial resulted in value: 1.5991913242818863. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:31:18,751] Finished a trial resulted in value: 3.4551418411681696. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:31:22,661] Finished a trial resulted in value: 1.6904498920924733. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n

[I 2019-03-27 17:34:02,716] Finished a trial resulted in value: 1.6319562296217538. Current best value is 1.5099048867093632 with parameters: {'n_layers': 3, 'n_units_l0': 601.7334661991075, 'n_units_l1': 34.53736314637694, 'n_units_l2': 2.755145401102368}.
[I 2019-03-27 17:34:05,129] Finished a trial resulted in value: 1.5039791246406304. Current best value is 1.5039791246406304 with parameters: {'n_layers': 1, 'n_units_l0': 5.320388316952276}.
[I 2019-03-27 17:34:07,480] Finished a trial resulted in value: 3.8988724230039833. Current best value is 1.5039791246406304 with parameters: {'n_layers': 1, 'n_units_l0': 5.320388316952276}.
[I 2019-03-27 17:34:12,258] Finished a trial resulted in value: 1.478389423365165. Current best value is 1.478389423365165 with parameters: {'n_layers': 1, 'n_units_l0': 55.6847300304602}.
[I 2019-03-27 17:34:16,581] Finished a trial resulted in value: 1.5896351006246037. Current best value is 1.478389423365165 with parameters: {'n_layers': 1, 'n_units_l0'

[I 2019-03-27 17:34:27,496] Finished a trial resulted in value: 1.5847131440331066. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:34:30,940] Finished a trial resulted in value: 1.7858205016420172. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:34:33,973] Finished a trial resulted in value: 1.6063527994204163. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:34:37,157] Finished a trial resulted in value: 1.5544384565481866. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:34:44,438] Finished a trial resulted in value: 1.6652875518731531. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:34:48,674] Finished a t

[I 2019-03-27 17:35:17,497] Finished a trial resulted in value: 1.6765618101326634. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:20,880] Finished a trial resulted in value: 1.6572598579422808. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:23,538] Finished a trial resulted in value: 1.554432096185787. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:27,552] Finished a trial resulted in value: 1.5846516222319644. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:30,104] Finished a trial resulted in value: 1.564758572731741. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:33,097] Finished a tri

[I 2019-03-27 17:35:39,962] Finished a trial resulted in value: 1.498518773386566. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:45,268] Finished a trial resulted in value: 1.6759031885906073. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:47,463] Finished a trial resulted in value: 8.5973314258129. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:55,437] Finished a trial resulted in value: 1.628334946799447. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:35:57,894] Finished a trial resulted in value: 1.9469553311382817. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:36:00,429] Finished a trial 

[I 2019-03-27 17:36:20,404] Finished a trial resulted in value: 1.6688637084108857. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:36:23,892] Finished a trial resulted in value: 2.7641182348803657. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:36:29,050] Finished a trial resulted in value: 1.6186926489572158. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:36:32,760] Finished a trial resulted in value: 1.5587910935089058. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:01,451] Finished a trial resulted in value: 7.7282773381643635. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:07,365] Finished a t

[I 2019-03-27 17:37:35,247] Finished a trial resulted in value: 1.5937769884595108. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:38,079] Finished a trial resulted in value: 1.739953731113721. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:40,370] Finished a trial resulted in value: 4.16231998941085. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:43,211] Finished a trial resulted in value: 1.6093087032729168. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:53,454] Finished a trial resulted in value: 7.37279002618078. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:37:55,803] Finished a trial 

[I 2019-03-27 17:38:41,104] Finished a trial resulted in value: 1.927094422410509. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:38:55,894] Finished a trial resulted in value: 1.7512039051899337. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:39:06,663] Finished a trial resulted in value: 1.7273585332187371. Current best value is 1.4357352659177811 with parameters: {'n_layers': 1, 'n_units_l0': 5.690829667369596}.
[I 2019-03-27 17:39:10,680] Finished a trial resulted in value: 1.415874610074891. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:13,560] Finished a trial resulted in value: 1.4831988341143303. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:17,680] Finished a trial

[I 2019-03-27 17:39:31,708] Finished a trial resulted in value: 1.535399939095107. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:41,072] Finished a trial resulted in value: 1.4807652071224735. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:44,170] Finished a trial resulted in value: 1.4961130038943584. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:46,661] Finished a trial resulted in value: 1.6855543360932113. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:49,331] Finished a trial resulted in value: 4.356243144260855. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:39:51,443] Finished a trial re

[I 2019-03-27 17:39:58,721] Finished a trial resulted in value: 1.5207723947503418. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:18,217] Finished a trial resulted in value: 1.538391403618202. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:23,404] Finished a trial resulted in value: 1.7170889774625047. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:28,712] Finished a trial resulted in value: 1.4481821492579774. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:33,881] Finished a trial resulted in value: 1.6215237504045008. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:37,126] Finished a trial r

[I 2019-03-27 17:40:46,902] Finished a trial resulted in value: 1.7357177859804827. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:51,592] Finished a trial resulted in value: 1.709202395954227. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:53,548] Finished a trial resulted in value: 1.7258090969772137. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:55,872] Finished a trial resulted in value: 1.7744428176671327. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:40:58,598] Finished a trial resulted in value: 1.8152272955524937. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:41:02,095] Finished a trial r

[I 2019-03-27 17:41:23,256] Finished a trial resulted in value: 2.5464283984946734. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:41:38,666] Finished a trial resulted in value: 1.5587180119236945. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:41:41,837] Finished a trial resulted in value: 1.6256453744399986. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:41:49,712] Finished a trial resulted in value: 1.618572179821079. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:41:57,610] Finished a trial resulted in value: 6.77087362166685. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:42:13,856] Finished a trial res

[I 2019-03-27 17:42:41,716] Finished a trial resulted in value: 1.5282648959110248. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:42:51,106] Finished a trial resulted in value: 1.891662695642176. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:42:56,510] Finished a trial resulted in value: 1.5286194263496862. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:43:28,559] Finished a trial resulted in value: 1.5939334456130496. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.
[I 2019-03-27 17:43:32,437] Finished a trial resulted in value: 1.5872269224010203. Current best value is 1.415874610074891 with parameters: {'n_layers': 1, 'n_units_l0': 38.93661768724165}.


In [15]:
# Optimized

n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor((39))
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6423 ± 0.0121
Test MSE: 1.4605 ± 0.0493


### RF

In [16]:
#Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor()
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6287 ± 0.0243
Test MSE: 1.5159 ± 0.0993


In [17]:
def objective_rf(trial):
    max_depth = int(trial.suggest_loguniform('max_depth', 2, 100))
    n_estimators = int(trial.suggest_loguniform('n_estimators', 2, 1000))
    max_features = trial.suggest_int('max_features', 1, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 10)
        
    n_folds = 4
    score = 0
    for _ in range(n_folds):
        rf = RandomForestRegressor(max_depth=max_depth, n_estimators=n_estimators, max_features=max_features,
                              min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)
        X_trn, X_val, y_trn, y_val = train_test_split(X_train, y_train)
        rf.fit(X_trn, y_trn)
        y_pred = rf.predict(X_val)
        score += mean_squared_error(y_val, y_pred)
    return score/n_folds

study = optuna.create_study()
study.optimize(objective_rf, n_trials=100)

[I 2019-03-27 17:47:02,562] Finished a trial resulted in value: 2.291075054629868. Current best value is 2.291075054629868 with parameters: {'max_depth': 19.97957442135277, 'n_estimators': 14.683033705451166, 'max_features': 1, 'min_samples_split': 2, 'min_samples_leaf': 6}.
[I 2019-03-27 17:47:02,650] Finished a trial resulted in value: 2.2376994718325247. Current best value is 2.2376994718325247 with parameters: {'max_depth': 22.019384922918128, 'n_estimators': 4.3585164019968206, 'max_features': 7, 'min_samples_split': 4, 'min_samples_leaf': 5}.
[I 2019-03-27 17:47:06,114] Finished a trial resulted in value: 2.643137045694201. Current best value is 2.2376994718325247 with parameters: {'max_depth': 22.019384922918128, 'n_estimators': 4.3585164019968206, 'max_features': 7, 'min_samples_split': 4, 'min_samples_leaf': 5}.
[I 2019-03-27 17:47:07,666] Finished a trial resulted in value: 1.9739687343666112. Current best value is 1.9739687343666112 with parameters: {'max_depth': 32.86182035

[I 2019-03-27 17:48:12,437] Finished a trial resulted in value: 2.7183151906580694. Current best value is 1.6563074203771297 with parameters: {'max_depth': 14.344749683916975, 'n_estimators': 106.20375195835145, 'max_features': 6, 'min_samples_split': 8, 'min_samples_leaf': 6}.
[I 2019-03-27 17:48:15,586] Finished a trial resulted in value: 2.1791123752695096. Current best value is 1.6563074203771297 with parameters: {'max_depth': 14.344749683916975, 'n_estimators': 106.20375195835145, 'max_features': 6, 'min_samples_split': 8, 'min_samples_leaf': 6}.
[I 2019-03-27 17:48:15,862] Finished a trial resulted in value: 3.2005348064715067. Current best value is 1.6563074203771297 with parameters: {'max_depth': 14.344749683916975, 'n_estimators': 106.20375195835145, 'max_features': 6, 'min_samples_split': 8, 'min_samples_leaf': 6}.
[I 2019-03-27 17:48:15,940] Finished a trial resulted in value: 2.5277879087657174. Current best value is 1.6563074203771297 with parameters: {'max_depth': 14.3447

[I 2019-03-27 17:50:22,924] Finished a trial resulted in value: 1.7610484316406896. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:50:30,920] Finished a trial resulted in value: 2.4222210963846913. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:50:31,063] Finished a trial resulted in value: 1.9124055726646882. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:50:31,204] Finished a trial resulted in value: 2.405548792858847. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.46292777136

[I 2019-03-27 17:52:41,121] Finished a trial resulted in value: 2.265776879110939. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:52:42,095] Finished a trial resulted in value: 1.7760699754747395. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:52:42,303] Finished a trial resulted in value: 1.895282103022127. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.462927771362471, 'n_estimators': 812.028696277209, 'max_features': 9, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-27 17:52:42,693] Finished a trial resulted in value: 1.814250944667489. Current best value is 1.5685099866936014 with parameters: {'max_depth': 10.4629277713624

In [18]:
# Optimized

n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor(max_depth=10, n_estimators=812, max_features=9,
                              min_samples_split=7, min_samples_leaf=1)
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))

Test R2: 0.6179 ± 0.0018
Test MSE: 1.5599 ± 0.0072


## Enum2Enum
### MLP

In [19]:
# Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor()
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6571 ± 0.0106
Test MSE: 1.3999 ± 0.0432


In [20]:
study = optuna.create_study()
study.optimize(objective_mlp, n_trials=100)

[I 2019-03-27 17:59:52,100] Finished a trial resulted in value: 1.6810123726719572. Current best value is 1.6810123726719572 with parameters: {'n_layers': 2, 'n_units_l0': 769.4995119769485, 'n_units_l1': 64.54746270299873}.
[I 2019-03-27 18:00:21,987] Finished a trial resulted in value: 4.31013467818729. Current best value is 1.6810123726719572 with parameters: {'n_layers': 2, 'n_units_l0': 769.4995119769485, 'n_units_l1': 64.54746270299873}.
[I 2019-03-27 18:00:24,375] Finished a trial resulted in value: 1.5037115884303607. Current best value is 1.5037115884303607 with parameters: {'n_layers': 1, 'n_units_l0': 5.886594927061267}.
[I 2019-03-27 18:00:36,389] Finished a trial resulted in value: 2.139986153862565. Current best value is 1.5037115884303607 with parameters: {'n_layers': 1, 'n_units_l0': 5.886594927061267}.
[I 2019-03-27 18:00:40,354] Finished a trial resulted in value: 1.8880254472874434. Current best value is 1.5037115884303607 with parameters: {'n_layers': 1, 'n_units_l0

[I 2019-03-27 18:00:58,000] Finished a trial resulted in value: 1.4865970692859518. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:01:06,424] Finished a trial resulted in value: 1.9095794965669648. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:01:16,593] Finished a trial resulted in value: 3.4475644543894006. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:01:23,105] Finished a trial resulted in value: 1.595112632751115. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:01:44,428] Finished a trial resulted in value: 1.7639367756015867. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:01:47,359] Finished

[I 2019-03-27 18:02:09,184] Finished a trial resulted in value: 1.7040077306911374. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:25,003] Finished a trial resulted in value: 1.633748455614556. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:27,588] Finished a trial resulted in value: 1.5527923364853182. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:30,023] Finished a trial resulted in value: 1.7191239993170244. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:34,335] Finished a trial resulted in value: 1.5421270099711988. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:41,412] Finished

[I 2019-03-27 18:02:47,384] Finished a trial resulted in value: 2.815295601944131. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:53,095] Finished a trial resulted in value: 1.5419605988161451. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:02:54,987] Finished a trial resulted in value: 9.009082762934153. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:05,280] Finished a trial resulted in value: 1.875697371073748. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:07,499] Finished a trial resulted in value: 1.6266512955040555. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:09,608] Finished a

[I 2019-03-27 18:03:18,097] Finished a trial resulted in value: 1.5968026397118489. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:32,812] Finished a trial resulted in value: 1.6206072490555794. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:35,102] Finished a trial resulted in value: 6.235656704630863. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:03:40,304] Finished a trial resulted in value: 1.506216062942996. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:04,577] Finished a trial resulted in value: 1.529135252172707. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:06,905] Finished a

[I 2019-03-27 18:04:43,081] Finished a trial resulted in value: 1.5874903912026914. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:45,675] Finished a trial resulted in value: 5.058542009849892. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:49,131] Finished a trial resulted in value: 1.6447925827671224. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:51,452] Finished a trial resulted in value: 1.8580208099488544. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:04:55,123] Finished a trial resulted in value: 1.5469206273181202. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:05:01,579] Finished

[I 2019-03-27 18:05:10,154] Finished a trial resulted in value: 1.6556895934860776. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:05:21,941] Finished a trial resulted in value: 1.6032861502405018. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:05:25,736] Finished a trial resulted in value: 1.6782229670690914. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:05:32,018] Finished a trial resulted in value: 1.6416519918982622. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:05:35,033] Finished a trial resulted in value: 1.6480542878144628. Current best value is 1.4865970692859518 with parameters: {'n_layers': 1, 'n_units_l0': 54.670902155151936}.
[I 2019-03-27 18:06:03,271] Finishe

[I 2019-03-27 18:07:06,250] Finished a trial resulted in value: 2.5415821986401683. Current best value is 1.4486623446264901 with parameters: {'n_layers': 3, 'n_units_l0': 696.337032987555, 'n_units_l1': 5.9532576568375335, 'n_units_l2': 96.79537060685016}.
[I 2019-03-27 18:07:23,043] Finished a trial resulted in value: 9.157214157960327. Current best value is 1.4486623446264901 with parameters: {'n_layers': 3, 'n_units_l0': 696.337032987555, 'n_units_l1': 5.9532576568375335, 'n_units_l2': 96.79537060685016}.
[I 2019-03-27 18:07:49,721] Finished a trial resulted in value: 1.492097301650756. Current best value is 1.4486623446264901 with parameters: {'n_layers': 3, 'n_units_l0': 696.337032987555, 'n_units_l1': 5.9532576568375335, 'n_units_l2': 96.79537060685016}.
[I 2019-03-27 18:08:17,327] Finished a trial resulted in value: 1.56430079324027. Current best value is 1.4486623446264901 with parameters: {'n_layers': 3, 'n_units_l0': 696.337032987555, 'n_units_l1': 5.9532576568375335, 'n_uni

[I 2019-03-27 18:16:22,020] Finished a trial resulted in value: 1.624245654954182. Current best value is 1.44599479163038 with parameters: {'n_layers': 2, 'n_units_l0': 875.2499546465023, 'n_units_l1': 88.13995328010041}.
[I 2019-03-27 18:16:28,986] Finished a trial resulted in value: 3.6617890289726045. Current best value is 1.44599479163038 with parameters: {'n_layers': 2, 'n_units_l0': 875.2499546465023, 'n_units_l1': 88.13995328010041}.
[I 2019-03-27 18:17:09,725] Finished a trial resulted in value: 1.6225817446981483. Current best value is 1.44599479163038 with parameters: {'n_layers': 2, 'n_units_l0': 875.2499546465023, 'n_units_l1': 88.13995328010041}.
[I 2019-03-27 18:17:19,850] Finished a trial resulted in value: 2.365446605142078. Current best value is 1.44599479163038 with parameters: {'n_layers': 2, 'n_units_l0': 875.2499546465023, 'n_units_l1': 88.13995328010041}.
[I 2019-03-27 18:17:48,282] Finished a trial resulted in value: 5.292063666782852. Current best value is 1.445

[I 2019-03-27 18:20:53,870] Finished a trial resulted in value: 1.6114832633247578. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.
[I 2019-03-27 18:21:18,237] Finished a trial resulted in value: 1.429351572755647. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.
[I 2019-03-27 18:21:26,881] Finished a trial resulted in value: 1.7103183806846203. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.
[I 2019-03-27 18:21:59,388] Finished a trial resulted in value: 1.4837889234036628. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.
[I 2019-03-27 18:22:10,247] Finished a trial resulted in value: 1.6820676711285847. Current best 

[I 2019-03-27 18:23:25,157] Finished a trial resulted in value: 1.9340959149246952. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.
[I 2019-03-27 18:23:27,448] Finished a trial resulted in value: 1.7031827405036302. Current best value is 1.4156719169443424 with parameters: {'n_layers': 2, 'n_units_l0': 629.0682900289332, 'n_units_l1': 10.853916801628936}.


In [21]:
# Optimized

n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor((629, 11))
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6758 ± 0.0057
Test MSE: 1.3236 ± 0.0232


### RF

In [22]:
#Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor()
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.6387 ± 0.0106
Test MSE: 1.4750 ± 0.0434


In [23]:
study = optuna.create_study()
study.optimize(objective_rf, n_trials=100)

[I 2019-03-27 18:41:00,200] Finished a trial resulted in value: 2.3814723656587695. Current best value is 2.3814723656587695 with parameters: {'max_depth': 3.1454442590298304, 'n_estimators': 866.8221259527813, 'max_features': 7, 'min_samples_split': 5, 'min_samples_leaf': 2}.
[I 2019-03-27 18:41:01,106] Finished a trial resulted in value: 2.844269901513285. Current best value is 2.3814723656587695 with parameters: {'max_depth': 3.1454442590298304, 'n_estimators': 866.8221259527813, 'max_features': 7, 'min_samples_split': 5, 'min_samples_leaf': 2}.
[I 2019-03-27 18:41:01,263] Finished a trial resulted in value: 1.9668347824700754. Current best value is 1.9668347824700754 with parameters: {'max_depth': 31.431617978839412, 'n_estimators': 13.450056743420534, 'max_features': 5, 'min_samples_split': 6, 'min_samples_leaf': 9}.
[I 2019-03-27 18:41:01,450] Finished a trial resulted in value: 2.0100769199009845. Current best value is 1.9668347824700754 with parameters: {'max_depth': 31.4316179

[I 2019-03-27 18:41:38,820] Finished a trial resulted in value: 1.9862033794055995. Current best value is 1.6279030887882195 with parameters: {'max_depth': 40.6009114330541, 'n_estimators': 41.554331820773605, 'max_features': 8, 'min_samples_split': 10, 'min_samples_leaf': 6}.
[I 2019-03-27 18:41:39,549] Finished a trial resulted in value: 2.7235754153140266. Current best value is 1.6279030887882195 with parameters: {'max_depth': 40.6009114330541, 'n_estimators': 41.554331820773605, 'max_features': 8, 'min_samples_split': 10, 'min_samples_leaf': 6}.
[I 2019-03-27 18:41:40,104] Finished a trial resulted in value: 1.6289979122236529. Current best value is 1.6279030887882195 with parameters: {'max_depth': 40.6009114330541, 'n_estimators': 41.554331820773605, 'max_features': 8, 'min_samples_split': 10, 'min_samples_leaf': 6}.
[I 2019-03-27 18:41:40,759] Finished a trial resulted in value: 1.726200489978912. Current best value is 1.6279030887882195 with parameters: {'max_depth': 40.60091143

[I 2019-03-27 18:44:49,226] Finished a trial resulted in value: 1.590600839204951. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:44:52,967] Finished a trial resulted in value: 1.7811533586940718. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:44:57,330] Finished a trial resulted in value: 1.9348519840904526. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:44:59,508] Finished a trial resulted in value: 1.8046495452765534. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.84049396

[I 2019-03-27 18:46:23,613] Finished a trial resulted in value: 1.643555056655357. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:46:24,312] Finished a trial resulted in value: 1.6965577350794063. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:46:24,667] Finished a trial resulted in value: 1.8738607102158833. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.840493965736485, 'n_estimators': 986.3704877190894, 'max_features': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}.
[I 2019-03-27 18:46:25,454] Finished a trial resulted in value: 1.7744741943436586. Current best value is 1.5083468807782512 with parameters: {'max_depth': 34.84049396

In [24]:
# Optimized

n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor(max_depth=35, n_estimators=986, max_features=9,
                              min_samples_split=2, min_samples_leaf=1)
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))

Test R2: 0.6402 ± 0.0023
Test MSE: 1.4688 ± 0.0095


## Transformer
### MLP

In [31]:
# Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor()
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.8464 ± 0.0057
Test MSE: 0.6271 ± 0.0234


In [32]:
study = optuna.create_study()
study.optimize(objective_mlp, n_trials=100)

[I 2019-03-27 19:01:51,906] Finished a trial resulted in value: 0.649292208312422. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:02:30,938] Finished a trial resulted in value: 0.7950882520451092. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:03:48,643] Finished a trial resulted in value: 0.6977634029723572. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:03:52,328] Finished a trial resulted in value: 0.8982705886442941. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:03:57,669] Finished a trial resulted in value: 0.7064421751891687. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:04:03,723] Finished a trial r

[I 2019-03-27 19:05:07,876] Finished a trial resulted in value: 1.092241616090808. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:05:12,068] Finished a trial resulted in value: 3.9695815154590917. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:05:15,108] Finished a trial resulted in value: 8.255620232972678. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:05:27,180] Finished a trial resulted in value: 0.6931765924224271. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:05:37,473] Finished a trial resulted in value: 0.6825002101041501. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:05:39,746] Finished a trial re

[I 2019-03-27 19:06:05,659] Finished a trial resulted in value: 0.7098399952478203. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:06:08,400] Finished a trial resulted in value: 0.837603573247074. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:06:15,871] Finished a trial resulted in value: 0.7194265827282295. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:06:30,218] Finished a trial resulted in value: 0.6740020922112564. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:06:48,821] Finished a trial resulted in value: 0.6920980863931724. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:06:53,816] Finished a trial r

[I 2019-03-27 19:08:06,647] Finished a trial resulted in value: 0.6910402577279486. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:08:10,586] Finished a trial resulted in value: 0.7885374877808354. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:08:38,100] Finished a trial resulted in value: 0.7739654520521171. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:08:55,130] Finished a trial resulted in value: 0.7314985440628122. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:09:02,351] Finished a trial resulted in value: 0.7069490495995927. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:09:21,514] Finished a trial 

[I 2019-03-27 19:10:29,496] Finished a trial resulted in value: 2.4091547088671486. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:10:35,119] Finished a trial resulted in value: 0.7101560505520901. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:10:44,265] Finished a trial resulted in value: 0.7004062839832418. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:10:51,636] Finished a trial resulted in value: 0.7978353796039545. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:11:20,702] Finished a trial resulted in value: 0.70559714085742. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:11:23,427] Finished a trial re

[I 2019-03-27 19:11:30,180] Finished a trial resulted in value: 1.5821167507441753. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:11:37,667] Finished a trial resulted in value: 0.7098117133218231. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:02,510] Finished a trial resulted in value: 0.7091521070859973. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:12,101] Finished a trial resulted in value: 0.8572661064982283. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:30,208] Finished a trial resulted in value: 0.7132627143517078. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:39,798] Finished a trial 

[I 2019-03-27 19:12:46,740] Finished a trial resulted in value: 0.9939287003139476. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:51,020] Finished a trial resulted in value: 0.7192219016055384. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:12:57,272] Finished a trial resulted in value: 0.6710251599806183. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:13:01,343] Finished a trial resulted in value: 0.7805270410057119. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:13:05,410] Finished a trial resulted in value: 0.684985255831078. Current best value is 0.649292208312422 with parameters: {'n_layers': 1, 'n_units_l0': 88.76031418639229}.
[I 2019-03-27 19:13:15,800] Finished a trial r

[I 2019-03-27 19:13:23,268] Finished a trial resulted in value: 1.8522737101522333. Current best value is 0.6465665582810649 with parameters: {'n_layers': 1, 'n_units_l0': 60.00713107581035}.
[I 2019-03-27 19:13:28,060] Finished a trial resulted in value: 0.6245894499044138. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:13:30,928] Finished a trial resulted in value: 0.8508555345730325. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:13:35,430] Finished a trial resulted in value: 0.7483461973176186. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:13:38,776] Finished a trial resulted in value: 0.6801622433540251. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:13:47,012] Finished a t

[I 2019-03-27 19:13:53,719] Finished a trial resulted in value: 0.6570434251062331. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:14:00,688] Finished a trial resulted in value: 0.7269334865168618. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:14:06,024] Finished a trial resulted in value: 0.7157859340216768. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:14:09,472] Finished a trial resulted in value: 5.330363069582716. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:14:32,557] Finished a trial resulted in value: 0.8242315629264407. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:14:44,581] Finished a tr

[I 2019-03-27 19:15:04,834] Finished a trial resulted in value: 0.7445283100438008. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:15:10,718] Finished a trial resulted in value: 0.6632081513307071. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:15:30,692] Finished a trial resulted in value: 0.6876014557205344. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:15:34,306] Finished a trial resulted in value: 0.7979648195085745. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:15:42,126] Finished a trial resulted in value: 0.6803989305877041. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:15:44,894] Finished a t

[I 2019-03-27 19:15:58,283] Finished a trial resulted in value: 0.7232121755476626. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:16:02,562] Finished a trial resulted in value: 0.7615119548491417. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:16:35,024] Finished a trial resulted in value: 0.7351832498289685. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:16:37,601] Finished a trial resulted in value: 0.8550010563882665. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:16:41,203] Finished a trial resulted in value: 0.7018550196397549. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:16:44,749] Finished a t

[I 2019-03-27 19:17:04,042] Finished a trial resulted in value: 0.7602311092326377. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:09,257] Finished a trial resulted in value: 0.695896640074519. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:33,588] Finished a trial resulted in value: 0.6655674029319893. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:36,565] Finished a trial resulted in value: 0.7896297892010395. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:39,840] Finished a trial resulted in value: 0.7210839063208673. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:44,449] Finished a tr

[I 2019-03-27 19:17:51,706] Finished a trial resulted in value: 0.734368924979903. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:17:55,438] Finished a trial resulted in value: 0.7312123554783386. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:18:05,348] Finished a trial resulted in value: 0.7565209785108375. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:18:13,989] Finished a trial resulted in value: 1.8650466283404439. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:18:29,734] Finished a trial resulted in value: 0.8828097623429674. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:18:42,935] Finished a tr

[I 2019-03-27 19:19:28,348] Finished a trial resulted in value: 0.7136375326950193. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:19:42,119] Finished a trial resulted in value: 0.7146508419666252. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:20:22,046] Finished a trial resulted in value: 0.7063618603927599. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:20:42,800] Finished a trial resulted in value: 0.7743913715873775. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:20:49,176] Finished a trial resulted in value: 0.6739834348709552. Current best value is 0.6245894499044138 with parameters: {'n_layers': 1, 'n_units_l0': 58.78949075813588}.
[I 2019-03-27 19:20:54,303] Finished a t

In [33]:
# Optimized
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    MLP = MLPRegressor(59)
    MLP.fit(X_train, y_train)
    y_pred = MLP.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.8421 ± 0.0040
Test MSE: 0.6446 ± 0.0161


### RF

In [34]:
#Default
n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor()
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))



Test R2: 0.7645 ± 0.0111
Test MSE: 0.9615 ± 0.0453


In [35]:
study = optuna.create_study()
study.optimize(objective_rf, n_trials=100)

[I 2019-03-28 11:48:58,732] Finished a trial resulted in value: 1.6698292243017374. Current best value is 1.6698292243017374 with parameters: {'max_depth': 5.6808253653308896, 'n_estimators': 27.408973177788376, 'max_features': 4, 'min_samples_split': 2, 'min_samples_leaf': 7}.
[I 2019-03-28 11:48:59,303] Finished a trial resulted in value: 1.192314717142425. Current best value is 1.192314717142425 with parameters: {'max_depth': 11.687340586483788, 'n_estimators': 31.086403951298298, 'max_features': 10, 'min_samples_split': 10, 'min_samples_leaf': 7}.
[I 2019-03-28 11:48:59,720] Finished a trial resulted in value: 1.787413267161701. Current best value is 1.192314717142425 with parameters: {'max_depth': 11.687340586483788, 'n_estimators': 31.086403951298298, 'max_features': 10, 'min_samples_split': 10, 'min_samples_leaf': 7}.
[I 2019-03-28 11:48:59,840] Finished a trial resulted in value: 1.5585647500765905. Current best value is 1.192314717142425 with parameters: {'max_depth': 11.68734

[I 2019-03-28 11:51:04,529] Finished a trial resulted in value: 1.203589037516287. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:51:05,142] Finished a trial resulted in value: 1.0751876260884976. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:51:11,359] Finished a trial resulted in value: 1.2821300974628533. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:51:11,823] Finished a trial resulted in value: 1.779600426774513. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.445039444967

[I 2019-03-28 11:52:18,833] Finished a trial resulted in value: 1.1042304144843025. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:52:35,845] Finished a trial resulted in value: 1.0649787530506916. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:52:38,433] Finished a trial resulted in value: 1.1335937195348178. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496788, 'n_estimators': 868.2335792329872, 'max_features': 5, 'min_samples_split': 7, 'min_samples_leaf': 1}.
[I 2019-03-28 11:52:41,422] Finished a trial resulted in value: 2.549202823210309. Current best value is 0.9559830360833292 with parameters: {'max_depth': 81.44503944496

[I 2019-03-28 11:53:26,596] Finished a trial resulted in value: 1.1215785196355599. Current best value is 0.9558994986586622 with parameters: {'max_depth': 35.322289568304406, 'n_estimators': 44.38473822847152, 'max_features': 8, 'min_samples_split': 6, 'min_samples_leaf': 3}.
[I 2019-03-28 11:53:32,364] Finished a trial resulted in value: 1.0901930687196015. Current best value is 0.9558994986586622 with parameters: {'max_depth': 35.322289568304406, 'n_estimators': 44.38473822847152, 'max_features': 8, 'min_samples_split': 6, 'min_samples_leaf': 3}.
[I 2019-03-28 11:53:47,850] Finished a trial resulted in value: 1.097794102847676. Current best value is 0.9558994986586622 with parameters: {'max_depth': 35.322289568304406, 'n_estimators': 44.38473822847152, 'max_features': 8, 'min_samples_split': 6, 'min_samples_leaf': 3}.
[I 2019-03-28 11:53:47,961] Finished a trial resulted in value: 1.8493076388446124. Current best value is 0.9558994986586622 with parameters: {'max_depth': 35.32228956

In [36]:
# Optimized

n = 10
r2 = np.zeros(n)
mse = np.zeros(n)

for i in range(n):
    RF = RandomForestRegressor(max_depth=35, n_estimators=44, max_features=8,
                              min_samples_split=6, min_samples_leaf=3)
    RF.fit(X_train, y_train)
    y_pred = RF.predict(X_test)
    r2[i] = r2_score(y_test, y_pred)
    mse[i] = mean_squared_error(y_test, y_pred)

print("Test R2: {:.4f} ± {:.4f}".format(np.mean(r2), np.std(r2)))
print("Test MSE: {:.4f} ± {:.4f}".format(np.mean(mse), np.std(mse)))

Test R2: 0.7724 ± 0.0053
Test MSE: 0.9292 ± 0.0218
