# Importing Libraries 

In [None]:
!pip install optuna


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Conv2D, BatchNormalization, MaxPool2D, Flatten, LeakyReLU
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
tf.test.is_gpu_available(cuda_only=True)
tf.config.list_physical_devices('GPU')



[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Dataset Preprocessing

## Importing Dataset

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Programs/Kaggle/Digit Recognizer/train.csv")
data_test = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Programs/Kaggle/Digit Recognizer/test.csv")

## Splitting Into X and y variables

In [None]:
X = data.iloc[:, 1:].values
y = data.iloc[:,0].values

In [None]:
X_test = data_test.values
X_test.shape

(28000, 784)

## Encoding Categorical Features

In [None]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding

y = to_categorical(y, num_classes = 10)

## Image Normalization

In [None]:
X = X/255
X_test = X_test/255

## Reshaping

In [None]:
X = X.reshape((42000,28,28,1))
X_test = X_test.reshape((28000,28,28,1))

In [None]:
X.shape

(42000, 28, 28, 1)

In [None]:
y.shape

(42000, 10)

## Splitting into training and validation

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_val,y_train, y_val = train_test_split(X, y, test_size = 0.1)

## Data Augmentation


In [None]:
augmentator = ImageDataGenerator(
        featurewise_center = False,
        samplewise_center = False,
        featurewise_std_normalization = False, 
        samplewise_std_normalization = False,
        zca_whitening = False,
        horizontal_flip = False,
        vertical_flip = False,
        fill_mode = 'nearest',
        rotation_range = 10,  
        zoom_range = 0.1, 
        width_shift_range = 0.1, 
        height_shift_range = 0.1)

augmentator.fit(X_train)
train_gen = augmentator.flow(X_train, y_train, batch_size=32)

In [None]:
augmentator = ImageDataGenerator(
        featurewise_center = False,
        samplewise_center = False,
        featurewise_std_normalization = False, 
        samplewise_std_normalization = False,
        zca_whitening = False,
        horizontal_flip = False,
        vertical_flip = False,
        fill_mode = 'nearest',
        rotation_range = 10,  
        zoom_range = 0.1, 
        width_shift_range = 0.1, 
        height_shift_range = 0.1)

augmentator.fit(X_val)
val_gen = augmentator.flow(X_val, y_val, batch_size=32)

# Old Attempts

## Attempt 1: Creating Neural Network

## Building the CNN

In [None]:
model_1 = tf.keras.models.Sequential()

In [None]:
model_1.add(Conv2D(filters = 16, kernel_size = 3, activation = 'relu', input_shape = [28,28,1]))
model_1.add(BatchNormalization())
model_1.add(Conv2D(filters = 16, kernel_size = 3, activation = 'relu'))
model_1.add(BatchNormalization())
model_1.add(MaxPool2D(pool_size = 2, strides = 2, padding = 'valid'))

In [None]:
model_1.add(Conv2D(filters = 16, kernel_size = 3, activation = 'relu'))
model_1.add(Conv2D(filters = 32, kernel_size = 3, activation = 'relu'))
model_1.add(MaxPool2D(pool_size = 2, strides = 2, padding = 'valid'))

In [None]:
model_1.add(Flatten())
model_1.add(BatchNormalization())

In [None]:
model_1.add(Dense(units = 32, activation = 'relu'))
model_1.add(BatchNormalization())
model_1.add(Dropout(rate = 0.1))
model_1.add(Dense(units = 64, activation = 'relu'))

In [None]:
model_1.add(Dense(units = 10, activation = 'softmax'))


In [None]:
model_1.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"])

## Training the CNN

In [None]:
model_1.fit(X_train, y_train, epochs = 3)

NameError: ignored

In [None]:
accuracy = model_1.evaluate(X_val, y_val)
accuracies = [accuracy[1]]

## Testing the CNN

## Attempt 2: Optuna Hyperparameter Tuning with a DCNN

## Network One

Layers:

1. Conv_2d
2. Conv_2d
3. Max_pooling
4. Conv_2d
5. Conv_2d
6. Max_Pooling
7. Flatten
8. Dense_1
9. Dropout
10. Dense
11. Output





In [None]:
import optuna
def objective_NN(trial):
  conv_2d_1 = Conv2D(filters = trial.suggest_int("conv_2d_1_filters" ,8, 128),
                     kernel_size = 3,
                     activation ='relu',
                     input_shape = (28, 28, 1))
  conv_2d_2 = Conv2D(filters = trial.suggest_int("conv_2d_2_filters" ,8, 128),
                     kernel_size =3,
                     activation = 'relu')
  conv_2d_3 = Conv2D(filters = trial.suggest_int("conv_2d_3_filters" ,8, 128),
                     kernel_size = 3,
                     activation = 'relu')
  conv_2d_4 = Conv2D(filters = trial.suggest_int("conv_2d_4_filters" ,8, 256),
                     kernel_size = 3,
                     activation = 'relu')
  
  max_pooling_1 = MaxPool2D(pool_size = 2, strides = 2, padding = 'valid')
  max_pooling_2 = MaxPool2D(pool_size = 2, strides = 2, padding = 'valid')

  flatten = Flatten()

  dense_1 = Dense(units = trial.suggest_int("dense_1_units", 16, 256), activation = trial.suggest_categorical('dense_1_activation', ['relu', 'linear', 'sigmoid']) )
  dense_2 = Dense(units = trial.suggest_int("dense_2_units", 16, 256), activation = trial.suggest_categorical('dense_2_activation', ['relu', 'linear', 'sigmoid']) )

  dropout = Dropout(rate = trial.suggest_float('dropout_rate', 0.05, 1))

  output = Dense(units = 10, activation = trial.suggest_categorical('output_activation', ['softmax', 'sigmoid']))



  model_layers = (conv_2d_1,conv_2d_2, max_pooling_1, conv_2d_3, conv_2d_4, max_pooling_2, flatten, dense_1, dropout, dense_2, output)
  model = tf.keras.models.Sequential(model_layers)
  model.compile(loss = 'categorical_crossentropy', optimizer="adam", metrics=["accuracy"])

  model.fit(X_train, y_train, epochs = trial.suggest_int('epochs', 1, 10))

  accuracy = model.evaluate(X_val, y_val)

  return accuracy[1]

study = optuna.create_study(direction='maximize')
study.optimize(objective_NN, n_trials = 100)

In [None]:
#  Trial 21 finished with value: 0.9933333396911621 and parameters: {'conv_2d_1_filters': 67, 'conv_2d_2_filters': 104, 'conv_2d_3_filters': 74, 'conv_2d_4_filters': 82, 'dense_1_units': 136, 'dense_1_activation': 'relu', 'dense_2_units': 252, 'dense_2_activation': 'sigmoid', 'dropout_rate': 0.7265591982474168, 'output_activation': 'sigmoid', 'epochs': 10}'sigmoid', 'dense_2_units': 96, 'dense_2_activation': 'sigmoid', 'dropout_rate': 0.5701146378876534, 'output_activation': 'softmax', 'epochs': 10}. Best is trial 37 with value: 0.9930952191352844.
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
def build_model(conv_2d_1_filters, conv_2d_2_filters, conv_2d_3_filters, conv_2d_4_filters, dense_1_units, dense_1_activation, dense_2_units, dense_2_activation, dropout_rate, output_activation):
  conv_2d_1 = Conv2D(filters = conv_2d_1_filters,
                     kernel_size = 3,
                     activation ='relu',
                     input_shape = (28, 28, 1))
  conv_2d_2 = Conv2D(filters = conv_2d_2_filters,
                     kernel_size = 3,
                     activation = 'relu')
  conv_2d_3 = Conv2D(filters = conv_2d_3_filters,
                     kernel_size = 3,
                     activation = 'relu')
  conv_2d_4 = Conv2D(filters = conv_2d_4_filters,
                     kernel_size = 3,
                     activation = 'relu')
  
  max_pooling_1 = MaxPool2D(pool_size = 2, strides = 2, padding = 'valid')
  max_pooling_2 = MaxPool2D(pool_size = 2, strides = 2, padding = 'valid')

  flatten = Flatten()

  dense_1 = Dense(units = dense_1_units, activation = dense_1_activation )
  dense_2 = Dense(units = dense_2_units, activation = dense_2_activation)

  dropout = Dropout(rate = dropout_rate)

  output = Dense(units = 10, activation = output_activation)



  model_layers = (conv_2d_1,conv_2d_2, max_pooling_1, conv_2d_3, conv_2d_4, max_pooling_2, flatten, dense_1, dropout, dense_2, output)
  return tf.keras.models.Sequential(model_layers)


In [None]:
params = study.best_trial.params
conv_2d_1_filters= params['conv_2d_1_filters'] 
conv_2d_2_filters= params['conv_2d_2_filters']
conv_2d_3_filters= params['conv_2d_3_filters']
conv_2d_4_filters= params['conv_2d_4_filters']
dense_1_units= params['dense_1_units']
dense_1_activation= params['dense_1_activation']
dense_2_units= params['dense_2_units']
dense_2_activation= params['dense_2_activation']
dropout_rate= params['dropout_rate']
output_activation = params['output_activation']
model_2 = build_model(conv_2d_1_filters, conv_2d_2_filters, conv_2d_3_filters, conv_2d_4_filters, dense_1_units, dense_1_activation, dense_2_units, dense_2_activation, dropout_rate, output_activation)
model_2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"])


In [None]:
model_2.fit(X_train, y_train, epochs = params['epochs'])

In [None]:
model_2.evaluate(X_val, y_val)

# Complete Restart

In [None]:
X_train.shape


(37800, 28, 28, 1)

In [None]:
model = tf.keras.models.Sequential()

In [None]:
model.add(Conv2D(filters = 32, kernel_size = 3, activation="relu", padding="same", input_shape = [28, 28, 1]))
model.add(Conv2D(filters = 32, kernel_size = 3, activation="relu", padding="same"))
model.add(MaxPool2D(pool_size = (2, 2), padding="same"))
#model.add(LeakyReLU(0.2))
model.add(BatchNormalization())
model.add(Dropout(0.2))


model.add(Conv2D(filters = 64, kernel_size = 3, activation="relu", padding="same"))
model.add(Conv2D(filters = 64, kernel_size = 3, activation="relu", padding="same"))
model.add(MaxPool2D(pool_size = (2, 2), padding="same"))
model.add(LeakyReLU(0.2))
model.add(BatchNormalization())

model.add(Conv2D(filters = 128, kernel_size = 3, activation="relu", padding="same"))
model.add(Conv2D(filters = 128, kernel_size = 3, activation="relu", padding="same"))
model.add(MaxPool2D(pool_size = (2, 2), padding="same"))
model.add(LeakyReLU(0.2))
model.add(BatchNormalization())
model.add(Dropout(0.2))


model.add(Flatten())



model.add(Dense(units = 512, activation="relu"))
model.add(BatchNormalization())

model.add(Dense(units = 256, activation="relu"))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(units = 64, activation="relu"))
model.add(BatchNormalization())

model.add(Dense(units = 10, activation="softmax"))

In [None]:
model.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_93 (Conv2D)          (None, 28, 28, 32)        320       
                                                                 
 conv2d_94 (Conv2D)          (None, 28, 28, 32)        9248      
                                                                 
 max_pooling2d_43 (MaxPoolin  (None, 14, 14, 32)       0         
 g2D)                                                            
                                                                 
 batch_normalization_62 (Bat  (None, 14, 14, 32)       128       
 chNormalization)                                                
                                                                 
 dropout_24 (Dropout)        (None, 14, 14, 32)        0         
                                                                 
 conv2d_95 (Conv2D)          (None, 14, 14, 64)      

In [None]:
optimizer = tf.keras.optimizers.Adam()

model.compile(optimizer= optimizer, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=["accuracy"])

In [None]:
model.fit(train_gen, epochs=1, steps_per_epoch = X_train.shape[0] // 64)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x7f963da650d0>

In [None]:
model.evaluate(val_gen)



[0.05270344763994217, 0.9845238327980042]

# Creating Testing Results

In [None]:
predictions = model.predict(X_test)
predictions = np.argmax(predictions, axis=1)

In [None]:
predictions

array([2, 0, 9, ..., 3, 9, 2])

In [None]:
submission = []
num = 1
for x in predictions:
  submission.append([num, x])
  num += 1

In [None]:
submission = pd.DataFrame(submission)
submission.columns = ["ImageId", "Label"]
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [None]:
submission.to_csv("output.csv", index = False)

In [None]:
submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9
