In [41]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import csv
import json
import pandas as pd

In [42]:
def normalize(X_train):
   newx_train=tf.cast(X_train,tf.float32)/255
   
   return newx_train


In [53]:
def initialize_parameters():
    hidden_units=[(25,784),(15,12),(10,15)]
    parameters=dict()
    
    layers=len(hidden_units)
    
    initializer=tf.keras.initializers.HeNormal()
    
    for i in range(1,layers+1):
        parameters["W"+str(i)]=tf.Variable(initializer(shape=(hidden_units[i-1][0],hidden_units[i-1][1]))) 
        parameters["B"+str(i)]=tf.Variable(initializer(shape=(hidden_units[i-1][0],1)))
       
    return parameters
        

In [54]:
def convert_to_one_hot(label,depth=10):
    encoded=tf.one_hot(label,depth,axis=0)
    transformed_Y=tf.reshape(encoded,[depth,])
    
    return transformed_Y

In [55]:
def forward_propagation(X,parameters):
    
    layers=len(parameters)//2
    activations=dict()
    linearOutput=dict()
    
    activations["A0"]=X
    for i in range(1,layers+1):
        linearOutput["Z"+str(i)]=tf.add(tf.linalg.matmul(parameters["W"+str(i)],activations["A"+str(i-1)]),parameters["B"+str(i)])
        if i!=layers:
            activations["A"+str(i)]=tf.keras.activations.relu(linearOutput["Z"+str(i)])
        
        
        
    return linearOutput["Z"+str(layers)]

In [56]:
def losses(linear,y_labels):
    
    cost=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_labels,linear))
    
    return cost


In [47]:
def model(X_train,Y_train,learning_rate=0.000001,num_epochs=150,minibatch_size=16,print_cost=True):
    costs=list()
    training_accuracies=list()
    
    parameters=initialize_parameters()
    optimizing_algo=tf.keras.optimizers.Adam(learning_rate)
    accuracy=tf.keras.metrics.CategoricalAccuracy()
    trainingSet=tf.data.Dataset.zip((X_train,Y_train))
    minibatches=trainingSet.batch(minibatch_size).prefetch(8)
    
    m=trainingSet.cardinality().numpy()
    
    W1=parameters["W1"]
    B1=parameters["B1"]
    W2=parameters["W2"]
    B2=parameters["B2"]
    W3=parameters["W3"]
    B3=parameters["B3"]
    # W4=parameters["W4"]
    # B4=parameters["B4"]
    # W5=parameters["W5"]
    # B5=parameters["B5"]
    # W6=parameters["W6"]
    # B6=parameters["B6"]
    # W7=parameters["W7"]
    # B7=parameters["B7"]
    # W8=parameters["W8"]
    # B8=parameters["B8"]
    
    for e in range(1,num_epochs+1):
        epochCost=0
        accuracy.reset_states()
        
        for (mini_X,mini_Y) in minibatches:
            with tf.GradientTape() as t:
                linearOutput=forward_propagation(tf.transpose(mini_X),parameters)
                
                mini_loss=losses(tf.transpose(linearOutput),mini_Y)
            accuracy.update_state(mini_Y,tf.transpose(linearOutput))
            
            training_parameters=[W1,B1,W2,B2,W3,B3]
            gradients=t.gradient(mini_loss,training_parameters)
            optimizing_algo.apply_gradients(zip(gradients,training_parameters))
            
            epochCost+=mini_loss
        epochCost/=m
        
        if print_cost==True and e%10==0:   
            print("Cost after epoch",e,"-",epochCost)
            print("Training accuracy - ",accuracy.result())
            
            costs.append(epochCost)
            training_accuracies.append(accuracy.result())
    return parameters,costs,training_accuracies

In [48]:

#main
train_set=pd.read_csv("train.csv")
test_set=pd.read_csv("test.csv")

In [49]:
x_train=tf.data.Dataset.from_tensor_slices(train_set.drop('label',axis=1))
y_train=tf.data.Dataset.from_tensor_slices(train_set['label'])

In [50]:
x_new_train=x_train.map(normalize)

In [51]:
y_new_train=y_train.map(convert_to_one_hot)

In [52]:
p,c,a=model(x_new_train,y_new_train)

Cost after epoch 10 - tf.Tensor(0.4998987, shape=(), dtype=float32)
Training accuracy -  tf.Tensor(0.09035714, shape=(), dtype=float32)
Cost after epoch 20 - tf.Tensor(0.4998987, shape=(), dtype=float32)
Training accuracy -  tf.Tensor(0.09035714, shape=(), dtype=float32)


KeyboardInterrupt: 

In [28]:
p=initialize_parameters()
print(p)

{'W1': <tf.Variable 'Variable:0' shape=(41, 784) dtype=float32, numpy=
array([[-0.06309426, -0.10270499,  0.27753732, ..., -0.3208927 ,
        -0.06140058, -0.03352188],
       [-0.17115432,  0.27340117, -0.09411117, ...,  0.1260702 ,
         0.20840596, -0.19342192],
       [ 0.1265841 , -0.14359882,  0.04106665, ...,  0.247392  ,
         0.0393915 ,  0.29915214],
       ...,
       [-0.18114232,  0.0620883 ,  0.34582543, ...,  0.34313187,
         0.2198454 ,  0.11413459],
       [ 0.09997883,  0.3527596 ,  0.0679897 , ..., -0.2089819 ,
         0.20007028,  0.21287994],
       [-0.08352175,  0.27139825, -0.17994718, ..., -0.049769  ,
         0.41893214,  0.06547701]], dtype=float32)>, 'B1': <tf.Variable 'Variable:0' shape=(41, 1) dtype=float32, numpy=
array([[-0.21192524],
       [-0.08288683],
       [ 0.47437885],
       [-0.05427252],
       [ 0.20576844],
       [ 0.29076937],
       [-0.29542392],
       [-0.48201284],
       [ 0.18188888],
       [-0.09681775],
       [ 0.