In [None]:
import numpy as np
%matplotlib inline
import tensorflow as tf
from tensorflow import keras

# Let's work on classifying fashion MNIST

In [None]:
# load the dataset (keras offers a functionality for this)
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = (fashion_mnist.load_data())

# inspect the shapes
# How many data instances are there in my train set? 
print(X_train_full.shape)
print(y_train_full.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
# visualize the first image
import matplotlib.pyplot as plt
im1 = X_train_full[0]


In [None]:
# now to have this in greyscale
plt.imshow(im1, cmap="Greys")

What's the class label of the first image?

In [None]:
label1 = y_train_full[0]
print(label1)

In [None]:
# okay we need better names for that:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

print(class_names[label1])

Split away a validation set with 5000 instances from the full training set 


In [None]:
# get a validation set of size 5000
# use the train-test split of scikit-learn for that matter
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=5000, random_state=42)

print(X_train.shape)
print(X_val.shape)

In [None]:
# Data vizualization
n_rows = 2
n_cols = 8
plt.figure(figsize=(n_cols*1.6, n_rows * 1.8))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)
        plt.imshow(X_train[index], cmap="binary", interpolation="nearest")
        plt.axis('off')
        plt.title(class_names[y_train[index]])

plt.show()

# Random Forest

In [None]:
# let's train a random forest classifier on the data to see what accuracies we'll get 

# First reshape this into one dimensional vectors
vector_size = X_train.shape[1]*X_train.shape[2]
X_train_vectors = X_train.reshape((X_train.shape[0], vector_size))
X_val_vectors = X_val.reshape((X_val.shape[0], vector_size))
X_test_vectors = X_test.reshape((X_test.shape[0], vector_size))

print(X_train_vectors.shape)

In [None]:
from sklearn.ensemble import RandomForestClassifier
# TODO 
# Assign correct splits from the cell above
train_features = None
train_labels = None

validation_features = None
validation_labels = None

clf = RandomForestClassifier(random_state=0)
clf.fit(train_features, train_labels)


# measure train score and val score
train_score = clf.score(train_features, train_labels)
val_score = clf.score(validation_features, validation_labels)

print("Train score: ", train_score)
print("Val score: ", val_score)

In [None]:
# draw a random image from the val set and show the model's prediction
import numpy as np
image_index = np.random.randint(len(X_val_vectors))
rand_image_vec = X_val_vectors[image_index]
rand_image = rand_image_vec.reshape((28,28))
plt.imshow(rand_image, cmap="Greys")

[predicted_class] = clf.predict(rand_image_vec.reshape(1,-1))
actual_class = y_val[image_index]

print("Predicted class: ", class_names[predicted_class])
print("Actual class: ", class_names[actual_class])

Now, let's train a logistic classifier


In [None]:
from sklearn.linear_model import LogisticRegression
# TODO use correct data splits
train_features = None
train_labels = None

validation_features = None
validation_labels = None

clf = LogisticRegression(random_state=0)
X_train_norm = train_features / 255.0
X_val_norm = validation_features / 255.0

clf.fit(X_train_norm, y_train)

# measure train score and val score
train_score = clf.score(train_features, train_labels)
val_score = clf.score(validation_features, validation_labels)

print("Train score: ", train_score)
print("Val score: ", val_score)

# Feedforward Neural network in Keras
Now it's time to shine for the neural network powered by keras

In [None]:
model = keras.models.Sequential()

model.add(keras.layers.Dense(300, input_shape=[28*28], activation="relu"))
# TODO add one more layer with 100 units and relu activation
# TODO add the output layer with number_of_classes units and softmax activation
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam",metrics=["accuracy"])
model.summary()

In [None]:
# let's inspect the weights of a single hidden layer, say the first
h1 = model.layers[0]
print(h1)
weights, biases = h1.get_weights()
print(weights.shape)
print(biases.shape)

In [None]:
history = model.fit(X_train_norm, y_train, epochs=30, validation_data=(X_val_norm, y_val))

In [None]:
import pandas as pd

pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.savefig("keras_learning_curves_plot.png")
plt.show()