In [None]:
import sys
import os
import sklearn
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [None]:
from pathlib import Path
import pandas as pd
import tarfile
import urllib.request

minst_balanced_train = pd.read_csv("/content/sample_data/emnist-balanced-train.csv", sep=',', header=None)
minst_balanced_test = pd.read_csv("/content/sample_data/emnist-balanced-test.csv", sep=',', header=None)

In [None]:
X_train_full = minst_balanced_train.iloc[:,1:]
Y_train_full = minst_balanced_train.iloc[:,0]
X_test = minst_balanced_test.iloc[:,1:]
Y_test = minst_balanced_test.iloc[:,0]

In [None]:
X_train_full.shape, Y_train_full.shape, X_test.shape, Y_test.shape

((112800, 784), (112800,), (18800, 784), (18800,))

In [None]:
unique, counts = np.unique(Y_train, return_counts=True)
proportions = counts/counts.sum()
dict(zip(unique, proportions))

{0: 0.02127659574468085,
 1: 0.02127659574468085,
 2: 0.02127659574468085,
 3: 0.02127659574468085,
 4: 0.02127659574468085,
 5: 0.02127659574468085,
 6: 0.02127659574468085,
 7: 0.02127659574468085,
 8: 0.02127659574468085,
 9: 0.02127659574468085,
 10: 0.02127659574468085,
 11: 0.02127659574468085,
 12: 0.02127659574468085,
 13: 0.02127659574468085,
 14: 0.02127659574468085,
 15: 0.02127659574468085,
 16: 0.02127659574468085,
 17: 0.02127659574468085,
 18: 0.02127659574468085,
 19: 0.02127659574468085,
 20: 0.02127659574468085,
 21: 0.02127659574468085,
 22: 0.02127659574468085,
 23: 0.02127659574468085,
 24: 0.02127659574468085,
 25: 0.02127659574468085,
 26: 0.02127659574468085,
 27: 0.02127659574468085,
 28: 0.02127659574468085,
 29: 0.02127659574468085,
 30: 0.02127659574468085,
 31: 0.02127659574468085,
 32: 0.02127659574468085,
 33: 0.02127659574468085,
 34: 0.02127659574468085,
 35: 0.02127659574468085,
 36: 0.02127659574468085,
 37: 0.02127659574468085,
 38: 0.021276595744680

In [None]:
X_valid, X_train = X_train_full[:9400] /255.,X_train_full[9400:] /255. #take 5000 for validation, :5000 colon before number means up to 5000, train with the remaining split. 5000: means from 5000 to the rest.
Y_valid, Y_train =Y_train_full[:9400],Y_train_full[9400:]
X_test = X_test / 255.

In [None]:
X_valid.shape, X_train.shape, Y_valid.shape, Y_train.shape, X_test.shape


((9400, 784), (103400, 784), (9400,), (103400,), (18800, 784))

In [None]:
if "google.colab" in sys.modules:
    %pip install -q -U keras_tuner

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.0/128.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import keras_tuner as kt

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=5, default=2)
    n_neurons = hp.Int("n_neurons", min_value = 16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    model = tf.keras.Sequential()
#    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(47, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

Using TensorFlow backend


In [None]:
random_search_tuner = kt.RandomSearch(
    build_model, objective="val_accuracy", max_trials=6, overwrite=True,
    directory="my_mist", project_name="my_rnd_search", seed=42)
random_search_tuner.search(X_train, Y_train, epochs=42,
                           validation_data=(X_valid, Y_valid))

Trial 6 Complete [00h 07m 24s]
val_accuracy: 0.5835106372833252

Best val_accuracy So Far: 0.8078723549842834
Total elapsed time: 00h 48m 22s


In [None]:
history = model.fit(X_train, Y_train, epochs=42,
                    validation_data=(X_valid, Y_valid))


NameError: ignored

In [None]:
pd.DataFrame(history.random_search_tuner).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

NameError: ignored

In [None]:
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 4 summary
Hyperparameters:
n_hidden: 5
n_neurons: 37
learning_rate: 0.008547485565344062
Score: 0.8078723549842834


In [None]:
best_trial.metrics.get_last_value("val_accuracy")

0.8078723549842834

In [None]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]
test_loss, test_accuracy = best_model.evaluate(X_test, Y_test)



