# **Feedforward artificial neural network : Image classification on MNIST dataset.**

In [1]:
# Import modules
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

### Data preparation

#### Import data

In [2]:
def load_data():
    # Import MNIST dataset from openml
    dataset = fetch_openml('mnist_784', version=1, data_home=None)

    # Data preparation
    raw_X = dataset['data']
    raw_Y = dataset['target']
    return raw_X, raw_Y

raw_X, raw_Y = load_data()

In [3]:
raw_X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Columns: 784 entries, pixel1 to pixel784
dtypes: float64(784)
memory usage: 418.7 MB


## Preprocessing


In [6]:
def clean_data(raw_X, raw_Y):

    cleaned_X = raw_X.astype('float32')
    cleaned_X /= 255
    
    num_classes = 10
    cleaned_Y = keras.utils.to_categorical(raw_Y, num_classes)
    
    return cleaned_X, cleaned_Y

cleaned_X, cleaned_Y = clean_data(raw_X, raw_Y)

#### Data split

- Split data into a train set (50%), validation set (20%) and a test set (30%).

In [10]:
def split_data(cleaned_X, cleaned_Y):

    X_train, X_test, Y_train, Y_test = train_test_split(cleaned_X, cleaned_Y, test_size=0.3, random_state=42)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=2/7, random_state=42)

    return X_val, X_test, X_train, Y_val, Y_test, Y_train

X_val, X_test, X_train, Y_val, Y_test, Y_train = split_data(cleaned_X, cleaned_Y)

### Model

#### Neural network structure
- For this network, we'll use 2 hidden layers
- Layer 1 should have 128 nodes, a dropout rate of 20%, and relu as its activation function
- Layer 2 should have 64 nodes, a dropout rate of 20%, and relu as its activation function
- The last layer should map back to the 10 possible MNIST class. Use softmax as the activation

In [15]:
def build_model():
    
       model = Sequential([
        Dense(128, activation='relu', name='layer1', input_shape=(784, )),
        Dropout(0.2),
        Dense(64, activation='relu', name='layer2'),
        Dropout(0.2),
        Dense(10, activation='softmax', name='layer3')
      ])   
     
       return model

model = build_model()


# Model compilation

- Use categorical crossentropy as loss function

# Model training
- Use a batch size of 128, and train for 12 epochs
- Use verbose training, include validation data


In [16]:
def compile_model(model):
    # TODO: compile the model
    # DO NOT CHANGE THE INPUTS OR OUTPUTS TO THIS FUNCTION

    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model

def train_model(model, X_train, Y_train, X_val, Y_val):
    # TODO: train the model
    # DO NOT CHANGE THE INPUTS OR OUTPUTS TO THIS FUNCTION
    history = model.fit(X_train, Y_train, batch_size = 128, epochs = 12, verbose=1, validation_data=(X_val, Y_val))
    return model, history

model = compile_model(model)
model, history = train_model(model, X_train, Y_train, X_val, Y_val)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


# Model evaluation
- Show the performance on the test set
- What is the difference between "evaluate" and "predict"?
- Identify a few images the model classifies incorrectly

In [17]:
def eval_model(model, X_test, Y_test):

    score = model.evaluate(X_test, Y_test, verbose=1)
    test_loss = score[0] 
    test_accuracy = score[1]
    print('Test Loss:', '%.4f' % test_loss)
    print('Test Accuracy:', '%.4f' % test_accuracy)

    return test_loss, test_accuracy

test_loss, test_accuracy = eval_model(model, X_test, Y_test)

Test Loss: 0.0920
Test Accuracy: 0.9735
