In [118]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [119]:
data = pd.read_csv("mnist_train.csv", header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [120]:
#data.describe()

In [121]:
data.shape

(5000, 785)

In [122]:
cols=data.shape[1]
print("columns = {}".format(cols))

columns = 785


In [123]:
y=np.array(data.iloc[:,:1])

In [124]:
print(y)

[[5]
 [0]
 [4]
 ...
 [2]
 [1]
 [2]]


In [125]:
x=np.array(data.iloc[:,1:cols])
print(x)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [126]:
x.shape, y.shape

((5000, 784), (5000, 1))

In [127]:
np.unique(y)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [128]:
rows = x.shape[0]
params = x.shape[1]

all_theta = np.zeros((10, params + 1)) # here 10 denotes number of labels (0 to 9)

x = np.insert(x, 0, values=np.ones(rows), axis=1) 

theta = np.zeros(params + 1)

#y_0 = np.array([1 if label == 0 else 0 for label in y])
#y_0 = np.reshape(y_0, (rows, 1))
#print(y_0)

x.shape, theta.shape, all_theta.shape

((5000, 785), (785,), (10, 785))

In [129]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [130]:
def Computecost(theta, x, y, learning_rate, RegRate):
    theta = np.matrix(theta)
    x = np.matrix(x)
    y = np.matrix(y)
    
    first = np.multiply(-y, np.log(sigmoid(x * theta.T))) #(-y*log(g(z)))
    second = np.multiply((1 - y), np.log(1 - sigmoid(x * theta.T))) #((1-y)*(1-log(g(z))))
    
    reg = (RegRate / 2 * len(x)) * np.sum(np.power(theta[:,1:theta.shape[1]], 2)) #regularisation
    
    return np.sum(first - second) / (len(x)) + reg 

In [131]:
def gradientReg(theta, x, y, learning_rate, RegRate):
    
    theta = np.matrix(theta)
    x = np.matrix(x)
    y = np.matrix(y)
    
    #parameters = int(theta.shape[1])
    
    error = sigmoid(x * theta.T) - y
    grad = ((x.T * error) / len(x)).T
    grad = grad * learning_rate
    grad = grad + ((RegRate / len(x)) * theta)
    # intercept gradient is not regularized
    grad[0, 0] = np.sum(np.multiply(error, x[:,0])) / len(x)
    
    return np.array(grad).ravel()

In [132]:
#alpha = 0.01
#iters = 10000
def gradientDescent(x, y, theta, learning_rate, RegRate):
    theta = np.matrix(theta)
    x = np.matrix(x)
    y = np.matrix(y)
    iters = 10000
    params = int(theta.shape[1])
    cost = np.zeros(iters)
   # print("theta-shape= {}".format(theta.shape))
    #print("x-shape= {}".format(x.shape))
    #print("y-shape= {}".format(y.shape))
    
    for i in range(iters):
        error = (sigmoid(x*theta.T)) - y
        
        for j in range(1, params):
            grad = np.multiply(error, x[:,j])
            theta[0,j] = theta[0,j] - (((learning_rate / len(x)) * np.sum(grad)) + ((RegRate / len(x)) * theta[0,j]))
            # intercept gradient is not regularized
            theta[0, 0] = theta[0,0]- (learning_rate * np.sum(np.multiply(error, x[:,0])) / len(x))
        
        #print(i)
        
        #theta = np.array(grad).ravel()
        #cost[i] = computeCost(x, y, theta)
        cost[i]=Computecost(theta, x, y, learning_rate, RegRate)
        print("cost".format(cost[i]))
        
    return theta

In [133]:
def one_vs_all(x, y, num_labels, learning_rate, RegRate):
    params = x.shape[1]
    rows = x.shape[0]
      
    for i in range(0, num_labels):
        theta = np.zeros(params)
        y_i = np.array([1 if label == i else 0 for label in y])
        y_i = np.reshape(y_i, (rows, 1))
        #print(y_i.shape)
        print("num_labels = {}".format(i))
        
        # minimize the objective function
        all_theta[i,:] = gradientDescent(x, y_i, theta, learning_rate, RegRate)
        #fmin = minimize(fun=cost, x0=theta, args=(x, y_i, learning_rate, RegRate), method='TNC', jac=gradientReg)
        #all_theta[i,:] = fmin.x
    
    return all_theta

In [134]:
#new_theta = one_vs_all(x, y, 10, learning_rate, i)


In [135]:
#cost= cost(new_theta, x, y, learning_rate)
#print("cost = {}".format(cost))

In [136]:
#gradientReg(new_theta, x, y, learning_rate)

In [137]:
def predict_all(x, theta): 
   
    # convert to matrices
    x = np.matrix(x)
    theta = np.matrix(theta)
    
    # compute the class probability for each class on each training instance
    h = sigmoid(x * theta.T)
    
    # create array of the index with the maximum probability
    h_argmax = np.argmax(h, axis=1)
    
    # because our array was zero-indexed we need to add one for the true label prediction
   # h_argmax = h_argmax 
  
    return h_argmax

In [138]:
t_data = pd.read_csv("mnist_test.csv", header=None)
testcols=t_data.shape[1]
t_x=np.array(t_data.iloc[:,1:testcols])
trows=t_x.shape[0]
t_x = np.insert(t_x, 0, values=np.ones(trows), axis=1)
t_y=np.array(t_data.iloc[:,:1])
np.unique(t_y)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [139]:
def frange(start, stop, step):
    i = start
    while i < stop:
        yield i
        i = i*step
        

In [140]:
Learning_rate=1
Reg_rate = 0.001
#for i in frange(0.001, 1, 10):
new_theta = one_vs_all(x, y, 10, Learning_rate, Reg_rate)
  
y_pred = predict_all(x, new_theta)
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print("i={}".format(i))
print ("Training data={} %".format(accuracy * 100))
        
ty_pred = predict_all(t_x, new_theta)
tcorrect = [1 if a == b else 0 for (a, b) in zip(ty_pred, t_y)]
taccuracy = (sum(map(int, tcorrect)) / float(len(tcorrect)))
print ("Test data=""{} %".format(taccuracy * 100))

num_labels = 0


  
  
  
  import sys
  import sys


ValueError: cannot copy sequence with size 2 to array axis with dimension 785