In [125]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
from sklearn.datasets import fetch_california_housing
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error as mse
from IPython.display import display
from sklearn.preprocessing import LabelEncoder
import optuna
from optuna.samplers import TPESampler
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgbm
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler

In [67]:
import warnings
warnings.filterwarnings('ignore')

In [68]:
# setting a base_dir so we don't have to type all the paths
BASE_DIR = Path("/kaggle/input/playground-series-s3e1/")

In [69]:
train_df = pd.read_csv(BASE_DIR / "train.csv")
test_df = pd.read_csv(BASE_DIR / "test.csv")

# Preprocessing

In [70]:
cols_to_use = train_df.columns[1:-1]
display(cols_to_use)
display(len(cols_to_use))

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude'],
      dtype='object')

8

In [71]:
train_df.drop(columns=["id"], axis=1, inplace=True)
test_df.drop(columns=["id"], axis=1, inplace=True)
train_df.isnull().sum()

MedInc         0
HouseAge       0
AveRooms       0
AveBedrms      0
Population     0
AveOccup       0
Latitude       0
Longitude      0
MedHouseVal    0
dtype: int64

In [72]:
df = pd.concat([train_df, test_df], axis=0, ignore_index=True)

# Feature Engineering
Copying all the feature engineering from https://www.kaggle.com/code/dmitryuarov/ps-s3e1-coordinates-key-to-victory

## Encoding Trick

In [73]:
emb_size = 20
precision = 1e6 

latlon = np.expand_dims(df[['Latitude', 'Longitude']].values, axis=-1) 

# display(latlon)
# display(latlon.shape)

m = np.exp(np.log(precision) / emb_size) 

angle_freq = m ** np.arange(emb_size) 
angle_freq = angle_freq.reshape(1, 1, emb_size) 

latlon = latlon * angle_freq 
latlon[..., 0::2] = np.cos(latlon[..., 0::2]) 
latlon[..., 1::2] = np.sin(latlon[..., 1::2]) 
latlon = latlon.reshape(-1, 2 * emb_size) 

In [74]:
df['exp_latlon1'] = [lat[0] for lat in latlon]
df['exp_latlon2'] = [lat[1] for lat in latlon]

## Coords with PCA & UMAP

In [75]:
from sklearn.decomposition import PCA

coordinates = df[['Latitude', 'Longitude']].values
pca = PCA().fit(coordinates)

df['pca_lat'] = pca.transform(coordinates)[:,0]
df['pca_lon'] = pca.transform(coordinates)[:,1]

In [76]:
from umap import UMAP
umap = UMAP(n_components=2, n_neighbors=50, random_state=228).fit(coordinates)
df['umap_lat'] = umap.transform(coordinates)[:,0]
df['umap_lon'] = umap.transform(coordinates)[:,1]

## Cartesian Coords Rotation

In [77]:
df['rot_15_x'] = (np.cos(np.radians(15)) * df['Longitude']) + \
                  (np.sin(np.radians(15)) * df['Latitude'])
    
df['rot_15_y'] = (np.cos(np.radians(15)) * df['Latitude']) + \
                  (np.sin(np.radians(15)) * df['Longitude'])
    
df['rot_30_x'] = (np.cos(np.radians(30)) * df['Longitude']) + \
                  (np.sin(np.radians(30)) * df['Latitude'])
    
df['rot_30_y'] = (np.cos(np.radians(30)) * df['Latitude']) + \
                  (np.sin(np.radians(30)) * df['Longitude'])
    
df['rot_45_x'] = (np.cos(np.radians(44)) * df['Longitude']) + \
                  (np.sin(np.radians(45)) * df['Latitude'])

## Coords Location

In [78]:
!pip install reverse_geocoder

[0m

In [79]:
import reverse_geocoder as rg

coordinates = list(zip(df['Latitude'], df['Longitude']))
results = rg.search(coordinates)
df['place'] = [x['admin2'] for x in results]

places = ['Los Angeles County', 'Orange County', 'Kern County',
          'Alameda County', 'San Francisco County', 'Ventura County',
          'Santa Clara County', 'Fresno County', 'Santa Barbara County',
          'Contra Costa County', 'Yolo County', 'Monterey County',
          'Riverside County', 'Napa County']

def replace(x):
    if x in places:
        return x
    else:
        return 'Other'
    
df['place'] = df['place'].apply(lambda x: replace(x))
le = LabelEncoder()
df['place'] = le.fit_transform(df['place'])

# Distance to Cities and Coast Lines

In [80]:
from haversine import haversine

Sac = (38.576931, -121.494949)
SF = (37.780080, -122.420160)
SJ = (37.334789, -121.888138)
LA = (34.052235, -118.243683)
SD = (32.715759, -117.163818)

df['dist_Sac'] = df.apply(lambda x: haversine((x['Latitude'], x['Longitude']), Sac, unit='ft'), axis=1)
df['dist_SF'] = df.apply(lambda x: haversine((x['Latitude'], x['Longitude']), SF, unit='ft'), axis=1)
df['dist_SJ'] = df.apply(lambda x: haversine((x['Latitude'], x['Longitude']), SJ, unit='ft'), axis=1)
df['dist_LA'] = df.apply(lambda x: haversine((x['Latitude'], x['Longitude']), LA, unit='ft'), axis=1)
df['dist_SD'] = df.apply(lambda x: haversine((x['Latitude'], x['Longitude']), SD, unit='ft'), axis=1)
df['dist_nearest_city'] = df[['dist_Sac', 'dist_SF', 'dist_SJ', 
                              'dist_LA', 'dist_SD']].min(axis=1)

In [81]:
from shapely.geometry import LineString, Point

coast_points = LineString([(32.6644, -117.1613), (33.2064, -117.3831),
                           (33.7772, -118.2024), (34.4634, -120.0144),
                           (35.4273, -120.8819), (35.9284, -121.4892),
                           (36.9827, -122.0289), (37.6114, -122.4916),
                           (38.3556, -123.0603), (39.7926, -123.8217),
                           (40.7997, -124.1881), (41.7558, -124.1976)])

df['dist_to_coast'] = df.apply(lambda x: Point(x['Latitude'], x['Longitude']).distance(coast_points), axis=1)

In [82]:
# adding features from here https://www.kaggle.com/competitions/playground-series-s3e1/discussion/375859

df_ext = df.copy()
# Number of houses in block : Population / AveOccup (size of block)
df_ext["n_houses_per_block"] = df.Population // df.AveOccup

# Total income of block : MedInc * Population (total wealth of each block - could adjust to discount children)
df_ext["total_income_per_block"] = df.MedInc * df.Population

# Ratio of occupants to bedrooms : AveOccup / AveBedrms (could help identify summer houses)
df_ext["occuptans_to_bedrooms"] = df.AveOccup // df.AveBedrms

# Number of unused bedrooms : AveBedrms - AveOccup (could correspond to guest rooms)
df_ext["unused_bedrooms"] = df.AveBedrms - df.AveOccup

# Total number of rooms : AveBedrms + AveRooms (indicates size of house)
df_ext["total_rooms"] = df.AveBedrms + df.AveRooms

# Number of non-bedrooms rooms : AveRooms - AveBedrms (how many bathrooms, kitchens etc.)
df_ext["non_bedroom_rooms"] = df.AveRooms - df.AveBedrms

# Ratio of bedrooms to rooms : AveBedrms/AveRooms (could be useful)
df_ext['bedrooms_to_rooms'] = df.AveBedrms // df.AveRooms

# Ratio of occupants to rooms : AveOccup / AveRooms (could be useful)
df_ext["occupants_to_rooms"] = df.AveOccup // df.AveRooms

# Preprocessing v2 - Could be final

In [83]:
train = df.iloc[:-len(test_df),:]
test = df.iloc[-len(test_df):,:].drop('MedHouseVal', axis=1).reset_index(drop=True)

X = train.drop('MedHouseVal', axis=1)
y = train['MedHouseVal']

# Modeling

In [115]:
def cross_validate(X, y, params, model_type="xgb"):
    all_scores = []
    kf = KFold(n_splits=5, shuffle=True, random_state=1337)
    
    for fold, (idx_tr, idx_val) in enumerate(kf.split(X)):
        X_tr, y_tr = X[idx_tr], y[idx_tr]
        X_val, y_val = X[idx_val], y[idx_val]
        
        if model_type=="xgb":
            model = xgb.XGBRegressor(**params)
            model.fit(X_tr, y_tr)
        
        elif model_type=="lgbm":
            model = lgbm.LGBMRegressor(objective="regression", **params)
            model.fit(X_tr, y_tr)
        
        elif model_type=="catboost":
            model = CatBoostRegressor(**params)
            model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=0, early_stopping_rounds=100)
        
        y_preds = model.predict(X_val)
        
        rmse = mse(y_val, y_preds, squared=False)
        
        print(f"Fold: {fold} \t RMSE: {rmse}")
        
        all_scores.append(rmse)
    
    avg_rmse = np.mean(all_scores)
    print(f"AVG RMSE: {avg_rmse}")

## XGBoost

In [85]:
# Using tuned params on combined dataset
xgb_params = {'n_estimators': 456,
 'max_depth': 8,
 'learning_rate': 0.026812575512866303,
 'min_child_weight': 10,
 'gamma': 0.22149150031512366,
 'subsample': 0.6211899414778005,
 'colsample_bytree': 0.32874025570599114,
 'reg_alpha': 1.5211300201400934e-05,
 'reg_lambda': 0.0005497911242012908}

# # Usingt params tuned on compeition dataset only
# xgb_params = {'n_estimators': 491,
#  'max_depth': 82,
#  'learning_rate': 0.010392729170176484,
#  'min_child_weight': 9,
#  'gamma': 0.0015886922329058258,
#  'subsample': 0.4063110795725336,
#  'colsample_bytree': 0.37944463822606667,
#  'reg_alpha': 0.7385809936556588,
#  'reg_lambda': 3.929683527917407e-08}

In [86]:
# without the new rooms cols
cross_validate(X.values, y.values, xgb_params, model_type="xgb")

Fold: 0 	 RMSE: 0.5606461390119041
Fold: 1 	 RMSE: 0.5474673210504931



KeyboardInterrupt



In [87]:
xgb_model = xgb.XGBRegressor(**xgb_params)
xgb_model.fit(X, y)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1,
             colsample_bytree=0.32874025570599114, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None,
             gamma=0.22149150031512366, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.026812575512866303, max_bin=256,
             max_cat_to_onehot=4, max_delta_step=0, max_depth=8, max_leaves=0,
             min_child_weight=10, missing=nan, monotone_constraints='()',
             n_estimators=456, n_jobs=0, num_parallel_tree=1, predictor='auto',
             random_state=0, reg_alpha=1.5211300201400934e-05,
             reg_lambda=0.0005497911242012908, ...)

In [89]:
# y_pred_test = xgb_model.predict(test)
# y_pred_test

array([0.65149474, 1.0264238 , 4.08481   , ..., 1.179051  , 3.6456306 ,
       3.7058122 ], dtype=float32)

In [90]:
# test_df_2 = pd.read_csv(BASE_DIR / "test.csv")

In [92]:
# submission_df = pd.DataFrame(data={'id': test_df_2.id, 'MedHouseVal': y_pred_test})
# submission_df.MedHouseVal.clip(0, 5, inplace=True)

In [93]:
# submission_df.to_csv("submission.csv", index=False)

# LightGBM

In [94]:
lgbm_params = {'n_estimators': 10000,
                 'num_rounds': 291,
                 'learning_rate': 0.14293898453640025,
                 'num_leaves': 2780,
                 'max_depth': 8,
                 'min_data_in_leaf': 400,
                 'lambda_l1': 45,
                 'lambda_l2': 0,
                 'min_gain_to_split': 0.002584545158305085,
                 'bagging_fraction': 0.9,
                 'bagging_freq': 1,
                 'feature_fraction': 0.4}

In [95]:
cross_validate(X.values, y.values, lgbm_params, model_type="lgbm")

Fold: 0 	 RMSE: 0.5688384524000736
Fold: 1 	 RMSE: 0.557009472506133
Fold: 2 	 RMSE: 0.5592965486333817
Fold: 3 	 RMSE: 0.5707360559721112
Fold: 4 	 RMSE: 0.5581323235943161
AVG RMSE: 0.562802570621203


# CatBoost

## Tuning catboost

In [102]:
# def objective(trial):
# #     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
#     param = {
#         "loss_function": trial.suggest_categorical("loss_function", ["RMSE"]),
#         "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e0),
#         "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1e-2, 1e0),
#         "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
#         "depth": trial.suggest_int("depth", 1, 10),
#         "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
#         "bootstrap_type": trial.suggest_categorical("bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]),
#         "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 2, 20),
#         "one_hot_max_size": trial.suggest_int("one_hot_max_size", 2, 20),  
#     }
#     # Conditional Hyper-Parameters
#     if param["bootstrap_type"] == "Bayesian":
#         param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
#     elif param["bootstrap_type"] == "Bernoulli":
#         param["subsample"] = trial.suggest_float("subsample", 0.1, 1)
    
#     cv = KFold(n_splits=5, shuffle=True, random_state=1337)

#     cv_scores = np.empty(5)
#     for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
#         X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
#         y_train, y_test = y[train_idx], y[test_idx]

#         reg = CatBoostRegressor(**param)
#         reg.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=0, early_stopping_rounds=100)
#         y_pred = reg.predict(X_test)
# #         score = r2_score(y_test, y_pred)
#         cv_scores[idx] = mse(y_test, y_pred, squared=False)
    
#     rmse = np.mean(cv_scores)
#     print(f"AVG CV RMSE: \t {rmse}")
#     return rmse

In [103]:
# study = optuna.create_study(sampler=TPESampler(), direction="minimize")
# study.optimize(objective, n_trials=100, show_progress_bar=True)

[32m[I 2023-01-09 18:11:53,172][0m A new study created in memory with name: no-name-48a46911-093c-4ad5-a6bd-7448d3beeea0[0m


  0%|          | 0/100 [00:00<?, ?it/s]

AVG CV RMSE: 	 0.605835915332434
[32m[I 2023-01-09 18:12:14,965][0m Trial 0 finished with value: 0.605835915332434 and parameters: {'loss_function': 'RMSE', 'learning_rate': 0.0448657975560104, 'l2_leaf_reg': 0.026914413043332048, 'colsample_bylevel': 0.02737854133229163, 'depth': 6, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'min_data_in_leaf': 14, 'one_hot_max_size': 9, 'bagging_temperature': 2.4060025506713414}. Best is trial 0 with value: 0.605835915332434.[0m
AVG CV RMSE: 	 0.5815814086716692
[32m[I 2023-01-09 18:12:33,845][0m Trial 1 finished with value: 0.5815814086716692 and parameters: {'loss_function': 'RMSE', 'learning_rate': 0.15732866868391407, 'l2_leaf_reg': 0.018152148367300943, 'colsample_bylevel': 0.023646385325580964, 'depth': 4, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'min_data_in_leaf': 18, 'one_hot_max_size': 18, 'bagging_temperature': 0.39807254503756884}. Best is trial 1 with value: 0.5815814086716692.[0m
AVG CV RMSE: 	 0.57

In [104]:
# study.best_value

0.5573980647463033

In [105]:
# study.best_params

{'loss_function': 'RMSE',
 'learning_rate': 0.06452422629792524,
 'l2_leaf_reg': 0.43216066651495005,
 'colsample_bylevel': 0.08287505606341404,
 'depth': 9,
 'boosting_type': 'Plain',
 'bootstrap_type': 'Bernoulli',
 'min_data_in_leaf': 15,
 'one_hot_max_size': 13,
 'subsample': 0.7287833251957154}

In [109]:
# catboost tuned params
catboost_params = {'loss_function': 'RMSE',
 'learning_rate': 0.06452422629792524,
 'l2_leaf_reg': 0.43216066651495005,
 'colsample_bylevel': 0.08287505606341404,
 'depth': 9,
 'boosting_type': 'Plain',
 'bootstrap_type': 'Bernoulli',
 'min_data_in_leaf': 15,
 'one_hot_max_size': 13,
 'subsample': 0.7287833251957154}

In [116]:
cross_validate(X.values, y.values, catboost_params, model_type="catboost")

Fold: 0 	 RMSE: 0.5163534034758568
Fold: 1 	 RMSE: 0.4615402239576296
Fold: 2 	 RMSE: 0.4609536635439039
Fold: 3 	 RMSE: 0.4666844369874598
Fold: 4 	 RMSE: 0.4638156297723478
AVG RMSE: 0.4738694715474396


In [117]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.10, shuffle=True, random_state=1337)

In [118]:
catboost_model = CatBoostRegressor(**catboost_params)
catboost_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=0, early_stopping_rounds=100)

<catboost.core.CatBoostRegressor at 0x7f133b40d6d0>

In [119]:
# y_pred_test = catboost_model.predict(test)
# y_pred_test

array([0.62484081, 0.96715308, 4.06528458, ..., 1.13804771, 3.76535544,
       3.57149727])

In [120]:
# test_df_2 = pd.read_csv(BASE_DIR / "test.csv")

In [121]:
# submission_df = pd.DataFrame(data={'id': test_df_2.id, 'MedHouseVal': y_pred_test})
# submission_df.MedHouseVal.clip(0, 5, inplace=True)

In [122]:
# submission_df.to_csv("submission.csv", index=False)

# Let's give Keras a shot

In [176]:
sc = StandardScaler()
sc.fit(X)
X_sc = sc.transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_sc, y, test_size=0.15, shuffle=True, random_state=92)
# Build the model
model = keras.models.Sequential()
model.add(layers.Dense(512, input_dim=27, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1))

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[keras.metrics.RootMeanSquaredError()])

model.fit(X_train, y_train, epochs=100, batch_size=256, validation_data=(X_test, y_test))

In [None]:
inputs = layers.Input(shape=X.shape[1])
x = layers.Dense(512, activation="relu")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(512, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(128, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(1)(x)


keras_model = keras.Model(inputs=inputs, outputs=outputs)
keras_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.005),
                   loss=keras.losses.mean_squared_error,
                   metrics=[keras.metrics.RootMeanSquaredError()])

early_stopping = keras.callbacks.EarlyStopping(
    patience=100,
    min_delta=0.001,
    monitor="val_root_mean_squared_error",
    restore_best_weights=True,
)

history = keras_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=512,
    epochs=300,
    callbacks=[early_stopping],
    verbose=1,
)



# y_pred = keras_model.predict(X_test)
# fold_rmse = mse(y_test, y_pred, squared=False)

Epoch 1/300


Exception ignored in: <function IteratorResourceDeleter.__del__ at 0x7f13c0ce2d40>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 546, in __del__
    handle=self._handle, deleter=self._deleter)
  File "/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1264, in delete_iterator
    _ctx, "DeleteIterator", name, handle, deleter)
KeyboardInterrupt: 


## Let's fine tune Keras

In [170]:
# def objective(trial, X, y):
#     sc = StandardScaler()
#     sc.fit(X)
#     X = sc.transform(X)
    
#     # keras model
#     inputs = layers.Input(shape=X.shape[1])
    
#     # block 1
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units1', [64, 128, 256, 512, 1024]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate1", 0.0, 0.6, 0.1),1))(x)
    
#     # block 2
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units2', [64, 128, 256, 512, 1024]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate2", 0.0, 0.6, 0.1), 1))(x)
    
#     # block 3
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units3', [32, 64, 128, 256, 512]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate3", 0.0, 0.6, 0.1),1))(x)
    
#     # block 4
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units4', [32, 64, 128, 256, 512]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate4", 0.0, 0.6, 0.1),1))(x)

#     # block 5
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units5', [16, 32, 64, 128, 256]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate5", 0.0, 0.6, 0.1), 1))(x)
    
#     # block 6
#     x = layers.Dense(
#                     units = trial.suggest_categorical('units6', [16, 32, 64, 128, 256]),
#                     activation = "relu")(inputs)
#     x = layers.BatchNormalization()(x)
#     x = layers.Dropout(
#                     rate = round(trial.suggest_discrete_uniform("rate6", 0.0, 0.6, 0.1), 1))(x)
    
#     outputs = layers.Dense(1)(x)
    
#     model = keras.Model(inputs=inputs, outputs=outputs)
    
#     model.compile(optimizer=trial.suggest_categorical("optimizer", ["Adam", "RMSprop"]),
#                  loss=keras.losses.mean_squared_error,
#                  metrics=[keras.metrics.RootMeanSquaredError()])
    
#     early_stopping = keras.callbacks.EarlyStopping(
#     patience= 5,
#     min_delta=0.001,
#     monitor="val_root_mean_squared_error",
#     restore_best_weights=True)
    
#     batch_size= trial.suggest_categorical("batch_size", [128, 256, 512, 1024]),
#     epochs= trial.suggest_int("epochs", 100, 500)
    
# #     print("batch_size :", batch_size[0])
# #     print("type(batch_size): ", type(batch_size[0]))

#     # So the suggest_categorical returns a tuple instead of simple int number, hence we gotta index 0
#     # to get the value for batch_size
    
#     # cross validate
#     cv = KFold(n_splits=5, shuffle=True, random_state=1337)

#     cv_scores = np.empty(5)
#     for idx, (train_idx, test_idx) in enumerate(cv.split(X, y)):
#         X_train, X_test = X[train_idx], X[test_idx]
#         y_train, y_test = y[train_idx], y[test_idx]

#         history = model.fit(X_train, y_train,
#                         batch_size=batch_size[0],
#                         validation_data=(X_test, y_test),
#                         epochs=epochs,
#                         callbacks=[early_stopping],
#                         verbose=0)
        
#         y_pred = model.predict(X_test)
#         cv_scores[idx] = mse(y_test, y_pred, squared=False)
    
#     rmse = np.mean(cv_scores)
#     print(f"AVG CV RMSE: \t {rmse}")
#     return rmse

In [171]:
# study = optuna.create_study(sampler=TPESampler(), direction="minimize", study_name="keras_tuner")
# func = lambda trial: objective(trial, X, y)
# study.optimize(func, n_trials=50, show_progress_bar=True)

[32m[I 2023-01-09 20:54:49,794][0m A new study created in memory with name: keras_tuner[0m


  0%|          | 0/50 [00:00<?, ?it/s]

AVG CV RMSE: 	 0.6300714984349727
[32m[I 2023-01-09 20:55:44,187][0m Trial 0 finished with value: 0.6300714984349727 and parameters: {'units1': 256, 'rate1': 0.0, 'units2': 256, 'rate2': 0.5, 'units3': 512, 'rate3': 0.30000000000000004, 'units4': 64, 'rate4': 0.30000000000000004, 'units5': 32, 'rate5': 0.0, 'units6': 256, 'rate6': 0.5, 'optimizer': 'RMSprop', 'batch_size': 128, 'epochs': 133}. Best is trial 0 with value: 0.6300714984349727.[0m
AVG CV RMSE: 	 0.6211161237897523
[32m[I 2023-01-09 20:56:07,722][0m Trial 1 finished with value: 0.6211161237897523 and parameters: {'units1': 128, 'rate1': 0.30000000000000004, 'units2': 1024, 'rate2': 0.30000000000000004, 'units3': 512, 'rate3': 0.6, 'units4': 64, 'rate4': 0.4, 'units5': 256, 'rate5': 0.4, 'units6': 64, 'rate6': 0.30000000000000004, 'optimizer': 'RMSprop', 'batch_size': 512, 'epochs': 229}. Best is trial 1 with value: 0.6211161237897523.[0m
AVG CV RMSE: 	 0.636535167009038
[32m[I 2023-01-09 20:56:29,662][0m Trial 2 fin

### It was total Failure to tune