In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import pandas as pd
import numpy as np

import tensorflow as tf
import optuna
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
 
train = pd.read_csv('../input/digit-recognizer/train.csv') 
test = pd.read_csv('../input/digit-recognizer/test.csv')

**The objective of this nootebok will be to create and optimize the hyperparameters of a neural network intended to distinguish digits, the goal is to reach a level of accuracy higher than 98.5%.**

In [3]:
print(train.shape)
print(test.shape)

In [4]:
# Transform data
X_train = (train.iloc[:,1:].values).astype('float32') 
y_train = train.iloc[:,0].values
X_test = test.values.astype('float32')

In [5]:
from sklearn.model_selection import train_test_split
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_train, y_train, 
                                                    test_size = 0.20,
                                                    train_size = 0.80,
                                                    random_state = 0,
                                                    stratify= y_train)
print(X_train.shape)
print(y_train.shape)

In [6]:
# Reshape data
img_rows, img_cols = 28, 28
num_classes = 10

In [7]:
# We process the data and convert it into a 28 x 28 matrix.
X_train = X_train2.reshape(X_train2.shape[0], img_rows, img_cols,1)
X_test = X_test2.reshape(X_test2.shape[0], img_rows, img_cols,1)

y_train = tf.keras.utils.to_categorical(y_train2, num_classes)
print(X_train.shape)

In [8]:
# Model CNN
def create_model(num_layer, mid_units, num_filters,dropout_rate):
    
    model = Sequential()
    model.add(Conv2D(filters=num_filters[0], kernel_size=(3, 3),
                 activation="relu",
                 input_shape=(img_rows, img_cols, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    for i in range(1,num_layer):
        model.add(Conv2D(filters=num_filters[i], kernel_size=(3,3), padding="same", activation="relu"))
    
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout_rate[0]))
    model.add(Flatten())
    model.add(Dense(mid_units))
    model.add(Dropout(dropout_rate[1]))
    model.add(Dense(num_classes, activation='softmax'))
    
    return model

In [9]:
def objective(trial):
    # Clear session
    keras.backend.clear_session()
    
    # Number of layers
    num_layer = trial.suggest_int("num_layer", 2, 5)
    mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 300, 100))
    
    # Number of filters in each convolution layer
    num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16))
                   for i in range(num_layer)]
    
    # Dropout
    dropout_rate = [0] * 2
    dropout_rate[0] = trial.suggest_uniform('dropout_rate'+str(0), 0.0, 0.5)
    dropout_rate[1] = trial.suggest_uniform('dropout_rate'+str(1), 0.0, 0.5)
    
    #Optimizer
    optimizer = trial.suggest_categorical("optimizer", ["sgd", "adam"])
    
    model = create_model(num_layer, mid_units, num_filters,dropout_rate)
    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=["acc"])
    
    history = model.fit(X_train, y_train, verbose=0, epochs=20,
                        batch_size=128, validation_split=0.1)
    
    scores = model.evaluate(X_train, y_train)
    print('accuracy={}'.format(*scores))
    
    # Find the hyperparameters that maximize the percentage of correct responses to the data for validation
    return 1 - history.history["val_acc"][-1]

In [11]:
study = optuna.create_study()
study.optimize(objective, n_trials=50)

In [12]:
# We can extract very valuable information from the optuna graphs, it will allow us to be more precise as to
# which hyperparameters we should intervene and how much.
from optuna.visualization import plot_edf
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

In [13]:
plot_edf(study)

In [18]:
plot_parallel_coordinate(study)

In [15]:
plot_param_importances(study)

**There does not seem to be any parameter irrelevant enough to be removed for the next optimization.**


In [16]:
plot_slice(study)

* **Regarding dropout_rate we could shorten the range of possibilities from 0.2 or 0.3 and lengthen it to 0.6, this applies to dropout_rate0 and dropout_rate1.**
* **mid_units, the third parameter with the most impact on our model, the model seems to be more comfortable between 200 and 300, I don't think it is necessary to touch anything here.**
* **increasing the number of layers may be a good idea, the model prefers 5 layers, we can try a range of 4 to 7.**
* **as far as optimizers are concerned sgd is the winner**

In [19]:
study.best_params

In [20]:
def objective1(trial):
    # Clear session
    keras.backend.clear_session()
    
    # Number of layers
    num_layer = trial.suggest_int("num_layer", 4, 7)
    mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 300, 100))
    
    # Number of filters in each convolution layer
    num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16))
                   for i in range(num_layer)]
    
    # Dropout
    dropout_rate = [0] * 2
    dropout_rate[0] = trial.suggest_uniform('dropout_rate'+str(0), 0.2, 0.6)
    dropout_rate[1] = trial.suggest_uniform('dropout_rate'+str(1), 0.2, 0.6)
    
    #Optimizer
    optimizer = trial.suggest_categorical("optimizer", ["sgd"])
    
    model = create_model(num_layer, mid_units, num_filters,dropout_rate)
    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=["acc"])
    
    history = model.fit(X_train, y_train, verbose=0, epochs=20,
                        batch_size=128, validation_split=0.1)
    
    scores = model.evaluate(X_train, y_train)
    print('accuracy={}'.format(*scores))
    
    # Find the hyperparameters that maximize the percentage of correct responses to the data for validation
    return 1 - history.history["val_acc"][-1]

In [21]:
study = optuna.create_study()
study.optimize(objective1, n_trials=70)

**Well, we got better accuracy than last time.**

In [22]:
study.best_params

In [23]:
num_filters = [128,64,112,112,128]
mid_units= 200.0
dropout_rate = [0.5436451132049005,0.5811878085067075]
optimizer = 'sgd'

In [24]:
model_best = Sequential()
model_best.add(Conv2D(filters=num_filters[0], kernel_size=(3, 3),activation="relu",input_shape=(img_rows, img_cols, 1)))
model_best.add(Conv2D(filters=num_filters[1], kernel_size=(3,3), padding="same", activation="relu"))
model_best.add(Conv2D(filters=num_filters[2], kernel_size=(3,3), padding="same", activation="relu"))
model_best.add(Conv2D(filters=num_filters[3], kernel_size=(3,3), padding="same", activation="relu"))
model_best.add(Conv2D(filters=num_filters[4], kernel_size=(3,3), padding="same", activation="relu"))
model_best.add(MaxPooling2D(pool_size=(2, 2)))
model_best.add(Dropout(dropout_rate[0]))
model_best.add(Flatten())
model_best.add(Dense(mid_units))
model_best.add(Dropout(dropout_rate[1]))
model_best.add(Dense(num_classes, activation='softmax'))

In [27]:
#Determine optimization method
model_best.compile(loss=keras.losses.categorical_crossentropy,
              #optimizer=keras.optimizers.Adadelta(),
              optimizer=optimizer,
              metrics=['accuracy'])

hist2 = model_best.fit(X_train, y_train,
                 batch_size=128,
                 epochs=50,
                 validation_split=0.1,
                 verbose=1)

scores_best = model_best.evaluate(X_train, y_train)
print('accuracy={}'.format(*scores_best))

**ok, I accidentally ran the model twice in a row, the first time it got its best score at 0.9962**

In [28]:
#loss
plt.plot(hist2.history['loss'])
plt.plot(hist2.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#Accuracy
plt.figure()
plt.plot(hist2.history['accuracy'])
plt.plot(hist2.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#  Accuracy: 0.9962 ;>)