<a href="https://colab.research.google.com/github/SilverSurferClash/Regression_templates/blob/main/feb_2021_nn_Optuna_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')
from google.colab import files

In [None]:
!pip install category_encoders

In [None]:
!pip install optuna

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
#Import os libaries
import os
import time
from pathlib import Path
import sys
import pickle
import joblib

#Import data manipulation libaries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#import ploting libraries
import matplotlib.pyplot as plt
import seaborn as sns

#Splitting the data and cross-validation
from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate, cross_val_score,KFold, GroupKFold, StratifiedKFold

#Metrics
#from sklearn.metrics import classification_report,accuracy_score , roc_auc_score
from sklearn.metrics import mean_squared_error
#Classifiers
import xgboost as xgb
from lightgbm import LGBMClassifier
#from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

#Feature engineerring
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer, make_column_transformer

#Encoding

import category_encoders as ce

#Helper modules
from tqdm import tqdm

In [None]:
#Notebook settings
# To define maximum number of columns to be displayed in a dataframe
pd.set_option('display.max_columns', None)

# To supress warnings
import warnings
warnings.filterwarnings('ignore')

#Set the theme for seaborn
#sns.set_theme(style="darkgrid")
pd.set_option('display.precision', 2)

#Set dark theme
plt.style.use('dark_background')

In [None]:
os.getcwd()

In [None]:
os.chdir("/content/drive/MyDrive/Colab_Notebooks/deep_learning")

In [None]:
train_import = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

#### List of different feature sets

In [None]:
# List of colums based on their function
# Target
target_label  = "target"
# List of all features
all_feature = [col for col in train_import.columns if col != "target"]
# List of numeric features
all_numeric = [col for col in all_feature if train_import[col].dtype == "float64"]
# List of all categorical features
all_categorical = [col for col in all_feature if train_import[col].dtype == "object"]

In [None]:
#Step 0: Drop non-essential columns
#Step 1: Perform imputation or encoding of missing data
#Step 2: Perform encoding for the different categorical columns
#Step 3: feature transformation of numeric features
#Step 4: feature generation by aggregation/groupby function
#Step 5: 

### Create the feature matrix and target values

In [None]:
#Create the training feature matrix, the training feature labels and the same for the
#validation set

train = train_import.sample(frac = 1).copy()
train_X = train.loc[:, all_feature]
train_y = train.loc[:, [target_label]].values
train_X.shape, train_y.shape

### Feature Engineering - Encoding and Scaling

In [None]:
preprocess_continuous = Pipeline(steps=[('scaler', StandardScaler())])
preprocess_categorical = Pipeline(steps=[('encoder',
ce.OneHotEncoder(cols=all_categorical))])

In [None]:
ct = ColumnTransformer(
transformers=[('continuous', #A
preprocess_continuous, all_numeric),
('categorical', #B
preprocess_categorical, all_categorical)],
remainder='passthrough')

In [None]:
train_X_trans = ct.fit_transform(train_X, train_y)
test_trans = ct.transform(test)

In [None]:
train_data = train_X_trans.copy()
train_targets = train_y.copy()


In [None]:
type(train_data), type(train_targets), train_targets.shape, train_data.shape

Create Neural network

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
import urllib
import warnings

import optuna

from keras.backend import clear_session
from keras.datasets import mnist
from keras.layers import Conv2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop




In [None]:
# TODO(crcrpar): Remove the below three lines once everything is ok.
# Register a global custom opener to avoid HTTP Error 403: Forbidden when downloading MNIST.
opener = urllib.request.build_opener()
opener.addheaders = [("User-agent", "Mozilla/5.0")]
urllib.request.install_opener(opener)



BATCHSIZE = 256
EPOCHS = 10

In [None]:
    # 1 Define the train_data, train_labels, val_data and val_labels
x_train, x_valid, y_train, y_valid = train_test_split(train_data, train_targets, test_size=0.2, random_state=42)
x_train.shape

In [None]:
def objective(trial):
    # Clear clutter from previous Keras session graphs.
    clear_session()

 

    # 1 Define the train_data, train_labels, val_data and val_labels
    #See above

    

    #Initialise the model
    model = Sequential()
    
    #Define the model structure
    model.add(Dense(units = trial.suggest_int("n_estimators_1", 16, 512),kernel_regularizer = trial.suggest_categorical("kernel_regularizer", ["l1", "l2", "l1_l2"]), activation="relu"))
    model.add(Dropout(trial.suggest_uniform('dropout', 0.0, 0.5)))
    model.add(Dense(units = trial.suggest_int("n_estimators_2", 16, 512), activation="relu"))
    model.add(Dropout(trial.suggest_uniform('dropout', 0.0, 0.5)))
    model.add(Dense(units = 1))

    # We compile our model with a sampled learning rate.
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 5e-1, log=True)
    
    model.compile(
        loss="mse",
        optimizer=RMSprop(learning_rate=learning_rate),
        metrics=[keras.metrics.RootMeanSquaredError()],
    )

    model.fit(
        x_train,
        y_train,
        validation_data=(x_valid, y_valid),
        shuffle=True,
        batch_size=BATCHSIZE,
        epochs=EPOCHS,
        verbose=False,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(x_valid, y_valid, verbose=0)
    return score[1]

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50, timeout=600)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

In [None]:
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_contour

In [None]:
plot_optimization_history(study)

In [None]:
plot_param_importances(study)

In [None]:
plot_contour(study)

In [None]:
f