In [25]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [130]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = np.array(X_train_full[:5000]) , np.array(X_train_full[5000:] )
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [131]:
print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)


In [132]:
m, n = 60000,784
X_train = X_train.reshape(784,55000)/255.0
X_test = X_test.reshape(784,10000)


In [133]:
def one_hot(Y):
  one_hot_Y = np.zeros((Y.size, Y.max() + 1))
  one_hot_Y[np.arange(Y.size), Y] = 1
  one_hot_Y = one_hot_Y.T
  return one_hot_Y


In [134]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.4
    b1 = np.random.rand(10, 1) -0.4
    W2 = np.random.rand(10, 10) -0.4
    b2 = np.random.rand(10, 1) -0.4
    return W1, b1, W2, b2
def ReLU(Z):
  return np.maximum(0,Z)
def Softmax(Z):
  return np.exp(Z)/sum(np.exp(Z))
##Softmax = np.vectorize(Softmax)
def forward( W1 , b1 , W2 , b2,X):
  Z1 = np.dot(W1,X) + b1
  A1 = ReLU(Z1)
  Z2 = np.dot(W2,A1) + b2 
  A2 = Softmax(Z2)
  return Z1, A1 , Z2 , A2
def ReLU_deriv(Z):
    return Z > 0
def backward(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2
def get_predictions(A2):
    return np.argmax(A2, axis = 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size
def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward(W1, b1, W2, b2,X)
        dW1, db1, dW2, db2 = backward(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
    predictions = get_predictions(A2)
    print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2,predictions

In [135]:
X_train.shape

(784, 55000)

In [136]:
y_train.shape

(55000,)

In [137]:
W1, b1, W2, b2,y_pred = gradient_descent(X_train, y_train, 0.10, 100)

0.10009090909090909


In [138]:
y_pred[:10]

array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

In [70]:
y_train[:10]

array([9, 0, 0, 3, 0, 2, 7, 2, 5, 5], dtype=uint8)

In [71]:
from keras import models
from keras.layers import Dense

In [139]:
import tensorflow as tf
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300,activation ="relu"),
    keras.layers.Dense(100,activation ="relu"),
    keras.layers.Dense(10,activation="softmax")
])

In [141]:
model.compile(loss = "sparse_categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [142]:
history = model.fit(X_train,y_train,epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model.evaluate(X_test, y_test)




[2.30972957611084, 0.10000000149011612]