In [238]:
import h5py
from sklearn.model_selection import train_test_split
import numpy as np

In [239]:
data = h5py.File('data/Q1/MNIST_Subset.h5','r+')
X=data['X'][:]
Y=data['Y'][:]

In [240]:
X.shape,Y.shape

((14251, 28, 28), (14251,))

In [241]:
X = X.reshape(X.shape[0],-1)

In [242]:
X=X/255

In [243]:
np.unique(Y)

array([7, 9], dtype=uint8)

In [244]:
#as we only have 2 classes
Y = np.where(Y==7,0,1)

In [245]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2, random_state=42)

In [246]:
X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((11400, 784), (2851, 784), (11400,), (2851,))

In [247]:
def one_hot_encoding(Y):
    n_classes = len(np.unique(Y))
    Y=Y.reshape(1,Y.shape[0])
    Y_new = np.eye(n_classes)[Y.astype('int32')]
    Y_new = Y_new.T.reshape(n_classes, Y.shape[1])
    return Y_new,n_classes
#     targets = Y.reshape(-1)
#     return np.eye(n_classes)[targets]

In [248]:
Y_train,n_classes = one_hot_encoding(Y_train)
Y_test,n_classes=one_hot_encoding(Y_test)

In [249]:
Y_train.shape,Y_test.shape

((2, 11400), (2, 2851))

In [250]:
X_train=X_train.T
X_test=X_test.T

In [251]:
X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((784, 11400), (784, 2851), (2, 11400), (2, 2851))

In [252]:
#hyperparameters
epoch = 10
learning_rate = 0.01

In [253]:
def sigmoid(z,derative = False):
    if derative:
        return z*(1-z)
    return 1.0/(1.0+np.exp(-z))

In [254]:
def softmax(z,derative = False):
    if derative:
        return 1
    return np.exp(z)/np.sum(np.exp(z),axis=0)

In [255]:
def compute_multiclass_loss(Y, Y_hat):
    L_sum = np.sum(np.multiply(Y, np.log(Y_hat)))
    m = Y.shape[1]
    return -(1/m) * L_sum

In [256]:
def init_weights(n_hidden_layers, number_of_neurons):
    weights_layers =[np.random.uniform(size=(number_of_neurons[0],X_train.shape[0]))]
    bias_layers = [np.random.uniform(size=(number_of_neurons[0],1))]
    
    for i in range(1,n_hidden_layers):
        weights_layers.append(np.random.uniform(size=(number_of_neurons[i],weights_layers[i-1].shape[0])))
        bias_layers.append(np.random.uniform(size=(number_of_neurons[i],1)))
    
    weights_layers.append(np.random.uniform(size=(n_classes,weights_layers[-1].shape[0])))
    bias_layers.append(np.random.uniform(size=(n_classes,1)))
    
    return weights_layers,bias_layers

In [257]:
weight_layers,bias_layers = init_weights(2,[100,50])

In [258]:
for i,j in zip(weight_layers,bias_layers):
    print(i.shape,j.shape)

(100, 784) (100, 1)
(50, 100) (50, 1)
(2, 50) (2, 1)


In [307]:
def train_neural_network(X_train,Y_train,epoch,learning_rate,hidden_layer_activation=sigmoid):
    global weight_layers
    global bias_layers
    
    cost=[]
    number_of_samples = X_train.shape[1]
    
    #------epoch start here----------
    
#     layer_outputs = []
#     layer_outputs_drev = []
#     delta_weights=[]
#     delta_bias=[]
#     last_layer_output = X_train

#     last_layer_output = X_train
    input_layers = [X_train]
    output_layers=[X_train]
    
    delta_weights = []
    delta_bais = []
    
    
    for weight,bais in zip(weight_layers[:-1],bais_layers[:-1]):
        input_layers.append(np.dot(weight,output_layers[-1])+bais.T)
        output_layers.append(hidden_layer_activation(input_layers[-1]))
    
    
    
    input_layers.append(np.dot(weight_layers[-1],output_layers[-1])+bias_layers[-1])
    output_layers.append(softmax(input_layers[-1]))
    
    cost.append(compute_multiclass_loss(Y_train,output_layers[-1]))
    print(cost)
        
    #------Output layer error --------
    error = output_layers[-1] - Y_train
    
    delta_weights.append((1./number_of_samples)*np.dot(error,output_layers[-2].T))
    delta_bais.append((1./number_of_samples)*np.sum(error, axis=1, keepdims=True))
                      
    #-----hidden layer stuff
    
    for i in range(len(input_layers)-1)[::-1]:
        print(i)
        error = np.dot(weight_layers[i].T,error)
#         print(error.shape,hidden_layer_activation(input_layers[i],derative=True).shape)
        delta_z = error*hidden_layer_activation(input_layers[i],derative=True)
        delta_weights.insert(0,(1./m)*np.dot(delta_z,output_layers[i-1].T))
        delta_bais.insert(0,(1./m)*np.sum(error,axis=1,keepdims=True))
    
    print(len(delta_weights))
        
        
        
#         layer_outputs.append(hidden_layer_activation(np.dot(last_layer_output,weight)+bias))
#         layer_outputs_drev.append(hidden_layer_activation(np.dot(last_layer_output,weight)+bias,derative=True))
#         last_layer_output=layer_outputs[-1]

#     layer_outputs.append(softmax(np.dot(last_layer_output,weight_layers[-1])+bias_layers[-1]))
    
#     cost.append(compute_multiclass_loss(Y_train,layer_outputs[-1]))
    
#     error = layer_outputs[-1] - Y_train
    
#     print(error.shape, layer_outputs[-2].shape)
#     delta_weights.insert(0,(1./number_of_samples)*np.dot(error.T,layer_outputs[-2]))
# #     delta_bias.insert(0,(1./number_of_samples)*np.sum(error,axis=1,keepdims=True))
    

In [308]:
train_neural_network(X_train,Y_train,epoch,learning_rate)

[2.2662437633820476]
2


IndexError: list index out of range

In [118]:
a = [1,2,3,4,5]

In [311]:
for i in range(3)[:0:-1]:
    print(i)

2
1
