In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error
from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
from statsmodels.tools.eval_measures import rmse
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
pd.set_option('display.max_row', 50)
pd.set_option('display.max_column', 150)

import warnings
warnings.filterwarnings('ignore')

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
Submission = pd.read_csv("sampleSubmission.csv")

In [3]:
train_x = train.drop(["target","id"],axis=1)
train_y = pd.DataFrame(train["target"])
test_x = test.drop("id", axis=1)

In [4]:
def create_dummies(df,column_name):
    dummies = pd.get_dummies(df[column_name],prefix=column_name)
    df = pd.concat([df,dummies],axis=1)
    return df

train_y_class = create_dummies(train_y,"target").drop("target",axis=1)

In [5]:
# Features Transformation
train_x_log = train_x.apply(lambda x: np.log(x+1))
test_x_log = test_x.apply(lambda x: np.log(x+1))
train_x_log_sqrt = train_x.apply(lambda x: np.sqrt(np.log(x+1)))
test_x_log_sqrt = test_x.apply(lambda x: np.sqrt(np.log(x+1)))

In [6]:
# To check the unique columns value no. between train and test set
compare_list = []
for columns in train_x.columns:
    a = len(train_x[columns].unique())
    b = len(test_x[columns].unique())
    if a != b:
        compare_list.append(columns)   

In [7]:
print(train_x.shape)
print(test_x.shape)
print(train_x_log.shape)
print(test_x_log.shape)
print(train_x_log_sqrt.shape)
print(test_x_log_sqrt.shape)
print(train_y_class.shape)

(61878, 93)
(144368, 93)
(61878, 93)
(144368, 93)
(61878, 93)
(144368, 93)
(61878, 9)


# Neural Network

In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Activation
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping
import tensorflow as tf
print(keras.__version__)
print(tf.__version__)

Using TensorFlow backend.


2.2.5
1.13.1


In [None]:
# Another method to reset the model weights
# model.save_weights('model_reset_weights.h5')
# model.load_weights('model_reset_weights.h5')

# Regularization layer
# model.add(Dense(int(number_of_neurons), activation = 'relu', kernel_regularizer=regularizers.l2(0.001)))
# model.add(Dropout(0.5))

## To have reproducible result with keras

In [9]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
backend.set_session(sess)

# Reset_Model trained Weights

In [10]:
def reset_weights(model):
    session = backend.get_session()
    for layer in model.layers: 
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel.initializer.run(session=session)

In [None]:
reset_weights(model)

In [None]:
tf.keras.backend.clear_session()

# 1. Neural Network, train with Original Dataset (X)

In [None]:
# define model
model = Sequential()
model.add(Dense(units=256,activation = "relu",kernel_regularizer=regularizers.l2(0.36), input_dim=93))
model.add(BatchNormalization())
model.add(Dense(units=128,activation = "relu", kernel_regularizer=regularizers.l2(0.36)))
model.add(BatchNormalization())
model.add(Dense(units=64,activation = "relu", kernel_regularizer=regularizers.l2(0.36)))
model.add(BatchNormalization())
model.add(Dense(units=32,activation = "relu", kernel_regularizer=regularizers.l2(0.36)))
model.add(BatchNormalization())
model.add(Dense(units=16,activation = "relu", kernel_regularizer=regularizers.l2(0.36)))
model.add(BatchNormalization())
model.add(Dense(9, activation = "softmax"))

In [None]:
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False, decay = 0.001)

In [None]:
model.compile(optimizer=adam, loss='categorical_crossentropy') #loss = loss function

In [None]:
# simple early stopping
# mode = min or max the monitor loss function
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=30,baseline=None)

In [None]:
model.fit(train_x,train_y_class,validation_split=0.10,
          batch_size=(int(train_x.shape[0]*0.2)),
          epochs=5000,verbose=1, callbacks=[early_stop])

In [None]:
print(backend.eval(model.optimizer.lr)) # print model learning rate

# Visualization

In [None]:
loss_per_epoch = pd.DataFrame(model.history.history)
loss_per_epoch.iplot(y=["loss","val_loss"], title = "loss_per_epoch")

In [None]:
train_pred = pd.DataFrame(model.predict(train_x),columns = train_y_class.columns)

In [None]:
train_y_class.head()

In [None]:
#train_pred_class = (pd.DataFrame(model.predict_classes(train_x), columns = ["predicted_class"])) + 1

In [None]:
NN_submission = pd.DataFrame(model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

# Fit Full Training Dataset for prediction

In [None]:
# define model
model = Sequential()
model.add(Dense(input_dim=105,units=12))
model.add(Dense(units=8))
model.add(Dense(units=4))
model.add(Dense(1, activation = "relu"))
model.compile(optimizer='adam', loss='mse', metrics = [rmse])

In [None]:
model.summary()

In [None]:
model.fit(train_x,train_y_class,batch_size=1,epochs=1000,verbose=1)

In [None]:
NN_submission = pd.DataFrame(model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

# Saving and Loading Models

In [None]:
model.save('Neural_Network_Ori_X.h5')

## Load a model

In [None]:
from keras.models import load_model
Load_model = load_model('Neural_Network_Ori_X.h5')

In [None]:
Load_model.summary()

In [None]:
NN_submission = pd.DataFrame(Load_model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

# 2. Neural Network, train with Original Dataset log(X+1)

In [None]:
# define model
model = Sequential()
model.add(Dense(units=93,activation = "relu",kernel_regularizer=regularizers.l2(0.0016), input_dim=93))
model.add(BatchNormalization())
model.add(Dense(units=46,activation = "relu", kernel_regularizer=regularizers.l2(0.0016)))
model.add(BatchNormalization())
model.add(Dense(units=32,activation = "relu", kernel_regularizer=regularizers.l2(0.0016)))
model.add(BatchNormalization())
model.add(Dense(units=16,activation = "relu", kernel_regularizer=regularizers.l2(0.0016)))
model.add(BatchNormalization())
model.add(Dense(units=8,activation = "relu", kernel_regularizer=regularizers.l2(0.0016)))
model.add(BatchNormalization())
model.add(Dense(9, activation = "softmax"))

In [None]:
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False, decay = 0.001)

In [None]:
model.compile(optimizer=adam, loss='categorical_crossentropy') #loss = loss function

In [None]:
# simple early stopping
# mode = min or max the monitor loss function
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=10,baseline=None)

In [None]:
model.fit(train_x_log,train_y_class,validation_split=0.10,
          batch_size=(int(train_x.shape[0]*0.2)),
          epochs=5000,verbose=1, callbacks=[early_stop])

In [None]:
print(backend.eval(model.optimizer.lr)) # print model learning rate

# Visualization

In [None]:
loss_per_epoch = pd.DataFrame(model.history.history)
loss_per_epoch.iplot(y=["loss","val_loss"], title = "loss_per_epoch")

In [None]:
train_pred = pd.DataFrame(model.predict(train_x),columns = train_y_class.columns)

In [None]:
train_y_class.head()

In [None]:
#train_pred_class = (pd.DataFrame(model.predict_classes(train_x), columns = ["predicted_class"])) + 1

In [None]:
NN_submission = pd.DataFrame(model.predict(test_x_log))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_log_X.csv",index=False)

# Fit Full Training Dataset for prediction

In [None]:
# define model
model = Sequential()
model.add(Dense(input_dim=105,units=12))
model.add(Dense(units=8))
model.add(Dense(units=4))
model.add(Dense(1, activation = "relu"))
model.compile(optimizer='adam', loss='mse', metrics = [rmse])

In [None]:
model.summary()

In [None]:
model.fit(train_x,train_y_class,batch_size=1,epochs=1000,verbose=1)

In [None]:
NN_submission = pd.DataFrame(model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

# Saving and Loading Models

In [None]:
model.save('Neural_Network_Ori_X.h5')

## Load a model

In [None]:
from keras.models import load_model
Load_model = load_model('Neural_Network_Ori_X.h5')

In [None]:
Load_model.summary()

In [None]:
NN_submission = pd.DataFrame(Load_model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

# 3. Neural Network, train with Original Dataset sqrt(log(X+1))

In [11]:
# define model
model = Sequential()
model.add(Dense(units=256,activation = "relu",kernel_regularizer=regularizers.l2(0.08), input_dim=93))
model.add(BatchNormalization())
model.add(Dense(units=128,activation = "relu", kernel_regularizer=regularizers.l2(0.08)))
model.add(BatchNormalization())
model.add(Dense(units=64,activation = "relu", kernel_regularizer=regularizers.l2(0.08)))
model.add(BatchNormalization())
model.add(Dense(units=32,activation = "relu", kernel_regularizer=regularizers.l2(0.08)))
model.add(BatchNormalization())
model.add(Dense(units=16,activation = "relu", kernel_regularizer=regularizers.l2(0.08)))
model.add(BatchNormalization())
model.add(Dense(9, activation = "softmax"))

Instructions for updating:
Colocations handled automatically by placer.


In [12]:
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False, decay = 0.001)

In [13]:
model.compile(optimizer=adam, loss='categorical_crossentropy') #loss = loss function

In [14]:
# simple early stopping
# mode = min or max the monitor loss function
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=50,baseline=None)

In [15]:
model.fit(train_x_log_sqrt,train_y_class,validation_split=0.10,
          batch_size=(int(train_x.shape[0]*0.2)),
          epochs=5000,verbose=1, callbacks=[early_stop])

Instructions for updating:
Use tf.cast instead.
Train on 55690 samples, validate on 6188 samples
Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 4/5000
Epoch 5/5000
Epoch 6/5000
Epoch 7/5000
Epoch 8/5000
Epoch 9/5000
Epoch 10/5000
Epoch 11/5000
Epoch 12/5000
Epoch 13/5000
Epoch 14/5000
Epoch 15/5000
Epoch 16/5000
Epoch 17/5000
Epoch 18/5000
Epoch 19/5000
Epoch 20/5000
Epoch 21/5000
Epoch 22/5000
Epoch 23/5000
Epoch 24/5000
Epoch 25/5000
Epoch 26/5000
Epoch 27/5000
Epoch 28/5000
Epoch 29/5000
Epoch 30/5000
Epoch 31/5000
Epoch 32/5000
Epoch 33/5000
Epoch 34/5000
Epoch 35/5000
Epoch 36/5000
Epoch 37/5000
Epoch 38/5000
Epoch 39/5000
Epoch 40/5000
Epoch 41/5000
Epoch 42/5000
Epoch 43/5000
Epoch 44/5000
Epoch 45/5000
Epoch 46/5000
Epoch 47/5000
Epoch 48/5000
Epoch 49/5000
Epoch 50/5000
Epoch 51/5000
Epoch 52/5000
Epoch 53/5000
Epoch 54/5000
Epoch 55/5000
Epoch 56/5000
Epoch 57/5000
Epoch 58/5000
Epoch 59/5000
Epoch 60/5000
Epoch 61/5000
Epoch 62/5000
Epoch 63/5000
Epoch 64/5000
Epoch 65/5000
Ep

Epoch 149/5000
Epoch 150/5000
Epoch 151/5000
Epoch 152/5000
Epoch 153/5000
Epoch 154/5000
Epoch 155/5000
Epoch 156/5000
Epoch 157/5000
Epoch 158/5000
Epoch 159/5000
Epoch 00159: early stopping


<keras.callbacks.History at 0x1c2a113ef0>

In [None]:
print(backend.eval(model.optimizer.lr)) # print model learning rate

# Visualization

In [16]:
loss_per_epoch = pd.DataFrame(model.history.history)
loss_per_epoch.iplot(y=["loss","val_loss"], title = "loss_per_epoch")

In [None]:
train_pred = pd.DataFrame(model.predict(train_x),columns = train_y_class.columns)

In [None]:
train_y_class.head()

In [None]:
#train_pred_class = (pd.DataFrame(model.predict_classes(train_x), columns = ["predicted_class"])) + 1

In [17]:
NN_submission = pd.DataFrame(model.predict(test_x_log_sqrt))

In [18]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [19]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_sqrt_log_X.csv",index=False)

# Fit Full Training Dataset for prediction

In [None]:
# define model
model = Sequential()
model.add(Dense(input_dim=105,units=12))
model.add(Dense(units=8))
model.add(Dense(units=4))
model.add(Dense(1, activation = "relu"))
model.compile(optimizer='adam', loss='mse', metrics = [rmse])

In [None]:
model.summary()

In [None]:
model.fit(train_x,train_y_class,batch_size=1,epochs=1000,verbose=1)

In [None]:
NN_submission = pd.DataFrame(model.predict(test_x_log_sqrt))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

# Saving and Loading Models

In [None]:
model.save('Neural_Network_Ori_X.h5')

## Load a model

In [None]:
from keras.models import load_model
Load_model = load_model('Neural_Network_Ori_X.h5')

In [None]:
Load_model.summary()

In [None]:
NN_submission = pd.DataFrame(Load_model.predict(test_x))

In [None]:
Submission[['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6',
       'Class_7', 'Class_8', 'Class_9']] = NN_submission

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)

In [None]:
## Creating a Submission File to submit to Kaggle competition ##
Submission.to_csv("Neural_Network_Ori_X.csv",index=False)