In [3]:
#importing the required dependencies,numpy for  linear algebra,pandas for working with data and matplotlib for visualizations.
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [4]:
data = pd.read_csv('MNIST_data/weight-height.csv') #load the MNIST data.

In [5]:
data.head() #Getting an overview of how the dataframe looks.

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [6]:
data.tail()

Unnamed: 0,Gender,Height,Weight
9995,Female,66.172652,136.777454
9996,Female,67.067155,170.867906
9997,Female,63.867992,128.475319
9998,Female,69.034243,163.852461
9999,Female,61.944246,113.649103


In [7]:
data.shape

(10000, 3)

In [8]:
data.describe()

Unnamed: 0,Height,Weight
count,10000.0,10000.0
mean,66.36756,161.440357
std,3.847528,32.108439
min,54.263133,64.700127
25%,63.50562,135.818051
50%,66.31807,161.212928
75%,69.174262,187.169525
max,78.998742,269.989699


In [9]:
data.nunique()

Gender        2
Height    10000
Weight    10000
dtype: int64

In [10]:
data.dtypes

Gender     object
Height    float64
Weight    float64
dtype: object

In [11]:
data.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

In [12]:
data['Gender'].value_counts()

Male      5000
Female    5000
Name: Gender, dtype: int64

In [13]:
data.drop_duplicates(keep='first',inplace=True)
print(data.shape)

(10000, 3)


In [14]:
data.columns
data['Gender'].replace(to_replace="Male",value=1,inplace=True)
data['Gender'].replace(to_replace="Female",value=0,inplace=True)

In [15]:
data.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [16]:
for col in ['Gender']:
    data[col] = data[col].astype('category')
for col in ['Height']:
    data[col] = data[col].astype('int')
for col in ['Weight']:
    data[col] = data[col].astype('int')

In [17]:
data.dtypes

Gender    category
Height       int32
Weight       int32
dtype: object

In [18]:
data = np.array(data) #passsing the new numpyfied data to the new data variable.

In [19]:
m,n=data.shape # mxn RowxCol.

In [20]:
print(m,n) #m denoting the total data and n denoting the pixels per data.

10000 3


In [21]:
data

array([[  1,  73, 241],
       [  1,  68, 162],
       [  1,  74, 212],
       ...,
       [  0,  63, 128],
       [  0,  69, 163],
       [  0,  61, 113]], dtype=int64)

In [None]:
np.random.shuffle(data) # shuffle before splitting into dev and training sets.

#This is the Cross-Validation split
data_dev = data[0:1000].T #Transposing the data from row vector to coloumn vector.
Y_dev = data_dev[0] #Now this would be the first Row(the output/the label/the aim).
X_dev = data_dev[1:n] #Now this is the data part after seperating the label hence the 1:n(n from the dimension).

#This is the Train Split
data_train = data[1000:m].T #Transposing the data from row vector to coloumn vector.
Y_train = data_train[0] #Now this would be the first Row(the output/the label/the aim).
X_train = data_train[1:n] #Now this is the data part after seperating the label hence the 1:n(n from the dimension).
_,m_train = X_train.shape

In [None]:
print(f"The Y_Train data:{Y_train}")
print(f"The X_train data:{X_train[0]}") #Just checking the 0th pos as its a big array.
print(f"The X_train data shape:{X_train[:,0].shape}") #Checking the shape to see the total pixel points are there.

In [None]:
def init_params(): #1
    W1 = np.random.rand(10, 2) - 0.5 #Making a matrix with 10x784 dimensions ranging from -0.5 to 0.5 for w1
    b1 = np.random.rand(10, 1) - 0.5  #Making a matrix with 10x1 dimensions ranging from -0.5 to 0.5 for b1
    W2 = np.random.rand(10, 10) - 0.5 #Making a matrix with 10x10 dimensions ranging from -0.5 to 0.5 for w2
    b2 = np.random.rand(10, 1) - 0.5 #Making a matrix with 10x1 dimensions ranging from -0.5 to 0.5 for b2
    return W1, b1, W2, b2

In [None]:
#Needed in the forward Propogation
def ReLU(Z): #3
    return np.maximum(Z, 0) #Just selecting the max function.

In [None]:
def forward_prop(W1, b1, W2, b2, X): #2
    Z1 = W1.dot(X) + b1 #Z1 is the non-active set of neurons.X is the input as its the input layer.
    A1 = ReLU(Z1) #A1 is the activation function to pass the values
    Z2 = W2.dot(A1) + b2 #Z2 its the second layer,which takes the dot product from A1 ie;prev output from neuron.
    A2 = sigmoid(Z2) #A2 has softmax applied data for further classification.
    return Z1, A1, Z2, A2


In [None]:
#Needed in the BackPropogation to undo the applied functions.
def ReLU_deriv(Z):#7
    return Z > 0

In [None]:
#for backward propogation
def one_hot(Y): #6
    one_hot_Y = np.zeros((Y.size, Y.max() + 1)) #To create an empty matrix(filled with 0)
    one_hot_Y[np.arange(Y.size), Y] = 1 #Setting the respective values as needed.
    one_hot_Y = one_hot_Y.T #Transposing the matrix
    return one_hot_Y


In [None]:
def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y): #5
    one_hot_Y = one_hot(Y)
    
    dZ2 = A2 - one_hot_Y #To find the error.
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1) #Revert changes.
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    
    return dW1, db1, dW2, db2

In [None]:
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha): #Alpha would act as the learning parameter.
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [None]:
def get_predictions(A2):
    return np.argmax(A2, 0) #on the Xaxis


In [None]:
# X=np.array([[1,2,3,4,5,6,7,8,9],[1,2,3,4,5,6,7,8,9]])
# W1, b1, W2, b2 = init_params()
# Z1 = W1.dot(X) + b1 #Z1 is the non-active set of neurons.X is the input as its the input layer.
# print(W1)
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# print(X)
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# print(Z1)
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# A1 = ReLU(Z1) #A1 is the activation function to pass the values
# print(W2)
# print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# print(A1)
# Z2 = W2.dot(A1) + b2 #Z2 its the second layer,which takes the dot product from A1 ie;prev output from neuron.

In [None]:
def sigmoid(Z):
    return 1.0 / (1.0 + np.exp(-Z))

In [None]:
def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

In [None]:
def gradient_descent(X, Y, alpha, iterations):
    
    W1, b1, W2, b2 = init_params()
    print(W1)
    
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0: #setting the denoter to occur every 10 steps.
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2

In [None]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 1000)