<a href="https://colab.research.google.com/github/DaisyLaw/Machine-Learning-practices/blob/main/Digit_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet tf_keras

In [None]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

In [None]:
import tf_keras
import tensorflow as tf
import tensorflow_hub as hub

print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)

# Check for GPU
print("GPU", "available (YESS!!!!)" if tf.config.list_physical_devices("GPU") else "not available :(")

In [None]:
# Random seeds
def set_seed(seed=0):
    np.random.seed(seed)
    tf.random.set_seed(seed)
set_seed()

In [None]:
train_df = pd.read_csv("/content/sample_data/Digit_Recognizer/Data/train.csv")
train_df.head()

In [None]:
len(train_df)

In [None]:
# Explore label distribution

# Figure size
plt.figure(figsize=(10,5))

# Countplot
sns.countplot(x='label', data=train_df)
plt.title('Distribution of labels in training set')

In [None]:
# Scale features to be in [0,1]
X=train_df.drop('label', axis=1)/255

# Reshape (-1 means unspecified)
X = X.values.reshape(-1, 28, 28, 1)

In [None]:
# Preview first few images

plt.figure(figsize=(6,6))
for num in range(0,25):
    plt.subplot(5,5,num+1)
    plt.imshow(X[num, :, :, :], interpolation = "none", cmap = plt.cm.binary)
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
y = train_df["label"]
len(y)

In [None]:
path_test = "/content/sample_data/Digit_Recognizer/Data/test.csv"
test_df = pd.read_csv(path_test)
test_df.info()

In [None]:
test_df.head()

In [None]:
# Save test data index for submission
test_index = test_df.index

# Reshape
test_data = test_df.values.reshape(-1, 28, 28, 1)

In [None]:
plt.figure(figsize=(6,6))
for num in range(0,25):
    plt.subplot(5,5,num+1)
    plt.imshow(test_data[num, :, :, :], interpolation = "none", cmap = plt.cm.binary)
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Data Augmentation

from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range = 18,           # 18 degrees
    zoom_range=0.2,                # 20 %
    fill_mode='constant',          # edge pixels
    cval=0                         # fill value
)

aug = datagen.flow(X[42].reshape(-1, 28, 28, 1))

# Plot using a for loop
fig, axes = plt.subplots(2, 8, figsize = (15, 4))
plt.suptitle('Random Rotation & Random Zoom', fontsize=20, y=1.05)
axes = axes.ravel()

for i in range(16):
    
    aug_img = next(aug)[0]
    axes[i].imshow(aug_img, cmap = 'gray')
    axes[i].axis('off')
    
plt.show()

In [None]:
# Define model
def build_model():
    model = tf_keras.models.Sequential([
    
        # Data Augmentation
        tf_keras.layers.RandomRotation(factor=0.05, fill_mode='constant'),     # 18 degrees
        tf_keras.layers.RandomZoom(height_factor=(-0.2,0.2), width_factor=(-0.2,0.2), fill_mode='constant'),  # 20%

        # Convolutional layer 1
        tf_keras.layers.Conv2D(filters=64, kernel_size=5, strides=1, padding='same', input_shape=[28,28,1], activation='relu'),
        tf_keras.layers.MaxPool2D(pool_size=2, padding='same'),
        tf_keras.layers.Dropout(rate=0.3),

        # Convolutional layer 2
        tf_keras.layers.Conv2D(filters=128, kernel_size=3, strides=1, padding='same', activation='relu'),
        tf_keras.layers.MaxPool2D(pool_size=4, padding='same'),
        tf_keras.layers.Dropout(rate=0.3),
        tf_keras.layers.Flatten(),

        # Hidden layer 3
        tf_keras.layers.Dense(units=256, activation='relu'),
        tf_keras.layers.Dropout(rate=0.4),

        # Output layer (softmax returns a probability distribution)
        tf_keras.layers.Dense(units=10, activation='softmax')

])

    # Define optimizer, loss function and accuracy metric
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['sparse_categorical_accuracy'])

    return model

In [None]:
from tensorflow.keras import callbacks

# Define early stopping callback on validation loss
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True,
)

# Reduce learning rate when validation loss plateaus
reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5
)

In [None]:
#Cross validation
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score

FOLDS = 10
EPOCHS = 200
BATCH_SIZE = 500

test_preds = np.zeros((1, 1))
scores = []
times = []
history_df = pd.DataFrame()

cv = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=0)

for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
    # Start timer
    start = time.time()

    # get training and validation sets
    X_train, X_valid = X[train_idx], X[val_idx]
    y_train, y_valid = y[train_idx], y[val_idx]

    # Build and train model
    model = build_model()
    fold_history = model.fit(
        X_train,
        y_train,
        validation_data=(X_valid, y_valid),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        #callbacks=[early_stopping, reduce_lr],
        verbose=False
    )

    history_df = pd.concat([history_df, pd.DataFrame(fold_history.history)], ignore_index=True)


    # Make predictions and measure accuracy
    y_pred = np.argmax(model.predict(X_valid), axis=1)
    score = accuracy_score(y_valid, y_pred)
    scores.append(score)

    # Store predictions
    test_preds = test_preds + model.predict(test_data)

    # Stop timer
    stop = time.time()
    times.append((stop - start)/60)

    # Print accuracy and time
    print(f'Fold {fold} - Accuracy: {score}, Time: {round((stop - start)/60,1)} mins')

print('')
print(f'Mean Accuracy: {np.mean(scores)}')

In [None]:
#Plot fold accuracy

# Figure size
plt.figure(figsize=(10,5))

# Line plot
plt.plot(scores, label='scores')
plt.plot(np.mean(scores)*np.ones(len(scores)), c='black', label='mean')

# Aesthetics
plt.title('Fold accuracy')
plt.xlabel('Fold')
plt.ylabel('Accuracy')
plt.ylim([0.99,1])
plt.legend()

In [None]:
#Plot fold time

# Figure size
plt.figure(figsize=(10,5))

# Line plot
plt.plot(times, label='times', c='orange')
plt.plot(np.mean(times)*np.ones(len(times)), c='black', label='mean')

# Aesthetics
plt.title('Fold time')
plt.xlabel('Fold')
plt.ylabel('Time (mins)')
plt.legend()

In [None]:
#Plot learning curves
for i in range(FOLDS):
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16,4))
    plt.suptitle('Fold '+str(i+1), fontsize=20)

    plt.subplot(1,2,1)
    ax=history_df.reset_index().loc[EPOCHS*i+1:EPOCHS*(i+1)-1, ['loss', 'val_loss']].plot(title="Cross-entropy", ax=axes[0])
    plt.xlabel('Epoch')

    plt.subplot(1,2,2)
    ax=history_df.reset_index().loc[EPOCHS*i+1:EPOCHS*(i+1)-1, ['sparse_categorical_accuracy', 'val_sparse_categorical_accuracy']].plot(title="Accuracy", ax=axes[1])
    plt.xlabel('Epoch')


In [None]:
# Soft voting to ensemble predictions
test_preds = np.argmax(test_preds, axis=1)

In [None]:
# Plot some model predictions
plt.figure(figsize=(15,3.5))
plt.suptitle('Model predictions', fontsize=20, y=1.05)

# Subplot
for i in range(20):
    img = test_data[i];
    ax=plt.subplot(2, 10, i+1)
    ax.grid(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    ax.title.set_text(f'Pred:{test_preds[i]}')
    plt.imshow(img, cmap='gray')
    
plt.show()

In [None]:
# Save predictions to file
output = pd.DataFrame({'ImageId': test_index+1,
                       'Label': test_preds})

# Check format
output.head()

In [None]:
output.to_csv('/content/sample_data/Digit_Recognizer/submission-02.csv', index=False)