In [1]:
import gzip
import urllib
import os
import numpy as np
os.listdir()

['.ipynb_checkpoints', 'softmax regression.ipynb']

In [2]:
def load_dataset():
    #----------------downloads necessary files if not in directory---------------------------    
    link='http://yann.lecun.com/exdb/mnist/'
    filenames=['train-images-idx3-ubyte.gz','train-labels-idx1-ubyte.gz','t10k-images-idx3-ubyte.gz','t10k-labels-idx1-ubyte.gz']
    directory='../datasets/mnist_dataset/'
    for filename in filenames:
        if filename not in os.listdir(directory):
            print('downloading '+str(filename))
            urllib.request.urlretrieve(link+filename, filename)
            
    #---------------------load datasets from file in directory-------------------------------

    def load_image(filename):
        with gzip.open(directory+filename,'rb') as f:
            buffer=f.read()
            data=np.frombuffer(buffer,dtype=np.uint8,offset=16)
            data=data.reshape(-1,28,28)  
        return data

    def load_label(filename):
        with gzip.open(directory+filename,'rb') as f:
            buffer=f.read()
            data=np.frombuffer(buffer,dtype=np.uint8,offset=8)
        return data
    
    
    x_train=load_image(filenames[0])
    y_train=load_label(filenames[1])
    x_test=load_image(filenames[2])
    y_test=load_label(filenames[3])
    return x_train,y_train,x_test,y_test


    

In [3]:
x_train,y_train,x_test,y_test=load_dataset()

#plot check
import matplotlib.pyplot as plt
plt.imshow(x_train[0],cmap='gray')
plt.show()
print('Its label: ',y_train[0])
y_test=np.eye(10)[y_test]
y_train=np.eye(10)[y_train]
#flatten
x_train=x_train.reshape((-1,np.product(x_train.shape[1:]))).T
x_test=x_test.reshape((-1,np.product(x_test.shape[1:]))).T
y_train=y_train.T
y_test=y_test.T


print('x_test_shape:',x_test.shape)
print('y_test_shape:',y_test.shape)
print('x_train_shape:',x_train.shape)
print('y_train.shape:',y_train.shape)

#normalize
x_train=x_train/float(256)
x_test=x_test/float(256)

<Figure size 640x480 with 1 Axes>

Its label:  5
x_test_shape: (784, 10000)
y_test_shape: (10, 10000)
x_train_shape: (784, 60000)
y_train.shape: (10, 60000)


In [27]:
def softmax_activation(Z):
    A=np.exp(Z)/np.sum(np.exp(Z),axis=0)
    return A

In [28]:
def initialize_parameters(input_size,output_size):
    w=np.zeros((input_size,output_size))
    b=np.zeros((output_size,1))
    parameters=(w,b)
    return parameters

In [29]:
def compute_cost(X,Y,parameters):
    m=X.shape[1]
    w,b=parameters
    Z=np.dot(w.T,X)+b
    A=softmax_activation(Z)
 
    cost=1/m*np.sum(-np.sum(Y*np.log(A),axis=0))
    return cost,A

In [30]:
def backward_propagation(parameters,A,cost,X,Y):
    m=Y.shape[1]
    dZ=A-Y
    dw=1/m*np.dot(X,dZ.T)
    db=1/m*np.sum(dZ)
    gradients=(dw,db)
    return gradients

In [31]:
def optimize(X,Y,parameters,n_iterations,learning_rate):
    w,b=parameters
    for i in range(n_iterations):
        cost,A=compute_cost(X,Y,parameters)
        gradients=backward_propagation(parameters,A,cost,X,Y)
        dw,db=gradients
        
        w=w-learning_rate*dw
        b=b-learning_rate*db
        parameters=(w,b)
        
        if(i%10==0):
            print('iteration:',i)
            print('cost:',cost)
            
        parameter=(w,b)
    return parameter
        

In [32]:
def predict(X,parameters):
    w,b=parameters
    m=X.shape[1]
    Z=np.dot(w.T,X)+b
    A=softmax_activation(Z)
    prediction=np.argmax(A,axis=0)
    
    return prediction
    

In [33]:

def model(x_train,y_train,x_test,y_test,n_iterations=1000,learning_rate=0.001):
    input_size=x_train.shape[0]
    output_size=10
    parameter=initialize_parameters(input_size,output_size)
    parameters=optimize(x_train,y_train,parameter,n_iterations,learning_rate)
    y_prediction_train=predict(x_train,parameters)
    y_prediction_test=predict(x_test,parameters)

    train_acc=np.mean(np.argmax(y_train,axis=0)==y_prediction_train)*100
    test_acc=np.mean(np.argmax(y_test,axis=0)==y_prediction_test)*100

    print('train_accuracy:',train_acc)
    print('test_accuracu:',test_acc)

In [35]:
model(x_train,y_train,x_test,y_test,n_iterations=10,learning_rate=0.001)

iteration: 0
cost: 2.3025850929940455
train_accuracy: 67.46666666666667
test_accuracu: 68.28999999999999
