<a href="https://colab.research.google.com/github/Pythonash/Kaggle-eng-/blob/Brain/Stochastic_parameter_finder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This notebook is written by pythonash.

I was meant to find the proper parameter containing learning rate, dropout rate, and so on.

This notebook will be modified until either I finally get optimal structure or this competition is ended with my indifference due to my work.

The purpose of notebook is to gather the loss result when parameters are changed as much as possible my time allows.

From the combination with many parameters, you can find the optimal parameter or find the way you can optimize your model.

Let's Start!

# Import libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
# import ubiquant
from sklearn.model_selection import KFold
import random
import time

start = time.time()

# Load and handle dataset

- This handling procedure is introduced in my previous notebook [End to end simple and powerful DNN with LeakyReLU](https://www.kaggle.com/pythonash/end-to-end-simple-and-powerful-dnn-with-leakyrelu).

- If you need more information about dataset handling, please check my notebook.


## NB

Note that I little bit change the procedure.

In previous notebook, I used "investment_id" and "Standard scaling".

But, I won't use both things in this notebook.

Furthermore, I will change the "f_col" values into normalization (Min-Max).

In [None]:
df = pd.read_parquet('../input/ubiquant-parquet/train_low_mem.parquet')
df

In [None]:
f_col = df.drop(['row_id','time_id','investment_id','target'],axis=1).columns
f_col

In [None]:
scaler = MinMaxScaler()
scaler.fit(pd.DataFrame(df[f_col]))

In [None]:
def make_dataset(df):
    f_df = df[f_col]
    scaled_f = scaler.transform(pd.DataFrame(f_df))
    data_x = pd.DataFrame(scaled_f)
    data_x.columns = f_df.columns
    del f_df
    data_x = data_x.astype('float16')
    return data_x

In [None]:
# df=df.astype('float16')
df_x = make_dataset(df)
df_x

In [None]:
df_y = pd.DataFrame(df['target'])
df_y

In [None]:
del df

# Stochastic model structure

- I construct my model with randomness (i.e., the number of neurons, dropout rate, and learning rate will be assigned randomly).

- I used PReLU instead of LeakyReLU.

- For every iteration, you will get random neurons, dropout rate, and learning rate in your model.

- By doing so, you will gather the logs, which are effects of parameters on loss result.

You can change model structure as much as you want.

In [None]:
def pythonash_model():
    neurons = random.randint(16, 513)
    drop_rate = random.randint(1,6)/10
    lr_rate = random.uniform(0.0005, 0.007)
    
    inputs_ = tf.keras.Input(shape = [df_x.shape[1]])
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(inputs_)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(leaky)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(leaky)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(leaky)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(leaky)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    drop = tf.keras.layers.Dropout(drop_rate)(leaky)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(drop)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    
    x = tf.keras.layers.Dense(neurons, kernel_initializer = 'he_normal')(leaky)
    batch = tf.keras.layers.BatchNormalization()(x)
    leaky = tf.keras.layers.PReLU()(batch)
    drop = tf.keras.layers.Dropout(drop_rate)(leaky)
    
    outputs_ = tf.keras.layers.Dense(1)(drop)
    
    model = tf.keras.Model(inputs = inputs_, outputs = outputs_)
    
    rmse = tf.keras.metrics.RootMeanSquaredError()

    learning_sch = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate = lr_rate,
    decay_steps = 10000,
    decay_rate = 0.97)
    
    adam = tf.keras.optimizers.Adam(learning_rate = learning_sch)
    
    model.compile(loss = 'mse', metrics = rmse, optimizer = adam)
    opt_name = str(model.optimizer).split('.')[3].split()[0]
    print('Current set is \n neurons: {0},\n Drop rate: {1}, \n learning_rate: {2}'.format(neurons, drop_rate, lr_rate))
    
    return neurons, drop_rate, lr_rate, model

# Save the simulation log

For each iteration, the logs are generated.

And this code save the logs automatically.

In [None]:
simulation_log = []
num_iter = 1
for iteration in np.arange(1,100):
    end = time.time()#
    if round(end - start,0) /3600 < 5:#
        print('Current running time: {} sec.'.format(round(end - start,0)))
        num_fold = 1
        kfold_generator = KFold(n_splits =5, shuffle=True)
        callbacks = tf.keras.callbacks.ModelCheckpoint('pythonash_model.h5', save_best_only = True)
        neurons, drop_rate, lr_rate, model = pythonash_model()
        fold_model = model.save('fold_model.h5')
        del fold_model
        del model
        for train_index, val_index in kfold_generator.split(df_x, df_y):
            fold_model = tf.keras.models.load_model('fold_model.h5')
            # Split training dataset.
            train_x, train_y = df_x.iloc[train_index], df_y.iloc[train_index]
            # Split validation dataset.
            val_x, val_y = df_x.iloc[val_index], df_y.iloc[val_index]
            # Make tensor dataset.
            tf_train = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(2022).batch(1024, drop_remainder=True).prefetch(1)
            tf_val = tf.data.Dataset.from_tensor_slices((val_x, val_y)).shuffle(2022).batch(1024, drop_remainder=True).prefetch(1)
            # Load model
            ###############################################################################################        
            print('======================================Fold %d Start!======================================='%num_fold)
            fit_history = fold_model.fit(tf_train, callbacks = callbacks, epochs = 5, #### change the epochs into more numbers.
                     validation_data = (tf_val), shuffle=True, verbose = 1)
            min_loss = np.array(fit_history.history['val_loss']).min()
            print('===========================================================================================')
            print('Model achieves %f in validation set.' %min_loss)
            print('===========================================================================================')
            simulation_log.append([num_iter, num_fold, neurons, drop_rate, lr_rate, min_loss])
            log_df = pd.DataFrame(simulation_log)
            log_df.columns = ['num_iter','num_fold','neurons', 'drop_rate', 'lr_rate', 'min_loss']
            print(log_df)
            log_df.to_csv('./Parameter finder log.csv', encoding = 'utf-8-sig', index = False)
            print('===========================================================================================')
            # Delete tensor dataset and model for avoiding memory exploring.
            del tf_train
            del tf_val
            del fit_history
            del fold_model
            num_fold += 1
    else:
        print('Memory using time is over.')
        break
#     del model
    del neurons
    del drop_rate
    del lr_rate
    del min_loss
    print('%d iteraion is over.' %num_iter)
    print('===========================================================================================')
    num_iter+=1
    