In [1]:
# Import the libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import time

# Check if proper versions are used
print(pd.__version__)
print(np.__version__)

0.24.2
1.16.2


In [2]:
# Normalization & Scaling Functions using Numpy & Pandas

# Outlier Scaling using .quantile() Pandas methods
def scale_outlier(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    min_bound = Q1 - 1.5*IQR
    max_bound = Q3 + 1.5*IQR
    df[column] = np.where(df[column] > max_bound, max_bound, df[column])
    df[column] = np.where(df[column] < min_bound, min_bound, df[column])

# Min-Max Scaling using .min() and .max() Pandas methods
def min_max_scaling(df):    
    df_norm = df.copy()
    for column in df_norm.columns:
        df_norm[column] = (df_norm[column] - df_norm[column].min()) / (df_norm[column].max() - df_norm[column].min())        
    return df_norm

In [3]:
# Reading the dataset using Pandas
df = pd.read_csv("LBW_Dataset.csv")

In [4]:
# Data Preprocessing

# Drop the columns Delivery Phase(1: 90, 2: 2, NaN: 4) and Education(5: 93, NaN: 3)
df = df.drop(["Delivery phase", "Education"], axis = 1)

# Not sure if this is Proper, what if testing set has Community = 2?
# Replacing Community = 2(count = 1) with Community = 1
df["Community"] = np.where(df["Community"] == 2, 1, df["Community"])

# Replacing Nan of Weights with the Mean of its respective Result category
mean_0 = (df.loc[df['Result'] == 0])['Weight'].mean()
mean_1 = (df.loc[df['Result'] == 1])['Weight'].mean()

df["Weight"] = np.where((df["Result"] == 0) & (df["Weight"].isna()), mean_0, df["Weight"])
df["Weight"] = np.where((df["Result"] == 1) & (df["Weight"].isna()), mean_1, df["Weight"])

# For now, Filling Numeric Columned NaN Values with Mean
df["Age"] = df["Age"].fillna(df["Age"].mean())
df["HB"] = df["HB"].fillna(df["HB"].mean())
df["BP"] = df["BP"].fillna(df["BP"].mean())
# df["Weight"] = df["Weight"].fillna(df["Weight"].mean()) -> cleaned using the above method

# Very Basic Method of taking care of Outliers(Replace with IQR, Min-Max) for Age & BP columns
scale_outlier(df, "Age")
scale_outlier(df, "BP")

# Labelling Residence = 2 as Residence = 0 to get Binary Labelled Column (Before: Residence(1,2), After: Residence(1,0))
df["Residence"] = np.where(df["Residence"] == 2, 0, df["Residence"])
# Filling NaN with Mode = 1
df["Residence"] = df["Residence"].fillna(1)

# One-Hot-Encode Community(1,3,4) to Community_1(1,0), Community_3(1,0), Community_4(1,0)
df = pd.get_dummies(df, columns=["Community"], dtype = float)
#df = df.drop(["Community"], axis = 1)

# Converting IFA(int) to IFA(float)
df["IFA"] = df["IFA"].astype(float)

# Moving converted Float Result, to get it as the last Column
res = df["Result"].astype(float)
df = df.drop(["Result"], axis = 1)
df["Result"] = res

In [5]:
# Performing Normalization of the dataset (into ranges from 0 to 1) using Pandas
df = min_max_scaling(df)

In [6]:
df.head()

Unnamed: 0,Age,Weight,HB,IFA,BP,Residence,Community_1,Community_3,Community_4,Result
0,0.273504,0.342857,0.647059,1.0,0.17103,1.0,1.0,0.0,0.0,0.0
1,0.273504,0.171429,0.568627,1.0,0.293194,1.0,1.0,0.0,0.0,0.0
2,0.273504,0.171429,0.647059,1.0,0.904014,1.0,1.0,0.0,0.0,0.0
3,0.273504,0.171429,0.411765,1.0,0.17103,1.0,1.0,0.0,0.0,0.0
4,0.478632,0.085714,0.666667,1.0,0.362583,1.0,1.0,0.0,0.0,0.0


In [7]:
# Creating Train-Test Splits of the dataset using .train_test_split() in Sklearn
X = df.iloc[:,:-1].values
y = df.iloc[:,-1:].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [8]:
def sigmoid(Z):
  return 1/(np.exp(-Z)+1)
def relu(Z):
  return np.maximum(0,Z)

In [9]:
def init_parameters(dims):
  parameters={}
  for l in range(1,len(dims)):
    parameters["W"+ str(l)]=np.random.randn(dims[l],dims[l-1])*np.sqrt(2/dims[l-1])
    parameters["b"+ str(l)]=np.zeros(shape=(dims[l],1))
    #limit=np.sqrt(6/(dims[l]+dims[l-1]))
    # parameters["W"+ str(l)]=np.random.uniform(-limit,limit,size=(dims[l],dims[l-1]))
    # parameters["b"+ str(l)]=np.random.uniform(-limit,limit,size=(dims[l],1))
  return parameters

def forward_activation(A,W,b,activation):
  Z=W.dot(A)+b
  if activation=="relu":
    A_new=relu(Z)
  elif activation=="sigmoid":
    A_new=sigmoid(Z)
  cache=((A,W,b),Z)
  return A_new,cache

def forward_propogate(X,parameters):
  L=len(parameters)//2
  caches=[]
  A=X
  for l in range(1,L):
    A_prev=A
    A,cache=forward_activation(A_prev,parameters["W"+str(l)],parameters["b"+str(l)],"sigmoid")
    caches.append(cache)
  #Last layer
  O,cache=forward_activation(A,parameters["W"+str(L)],parameters["b"+str(L)],"sigmoid")
  caches.append(cache)
  return O,caches

In [10]:
def compute_cost(O,Y,parameters,lambd):
  assert(O.shape == Y.shape)
  m=Y.shape[1]
  #print("examples:",m)
  cost=(1./m)*(-np.dot(Y,np.log(O).T)-np.dot(1-Y,np.log(1-O).T))
  cost=np.squeeze(cost)
  assert(cost.shape==())
  L=len(parameters)//2
  reg_cost=0
  for l in range(1,L+1):
    reg_cost+=(1/m) * (lambd/2) * np.sum(parameters["W"+str(l)]**2)
  return cost#+reg_cost

In [11]:
def update_parameters(parameters,v,grads,learning_rate):
    L = len(parameters) // 2 # number of layers in the neural networks
    beta=0.9
    # Momentum update for each parameter
    for l in range(L):
        
        ### START CODE HERE ### (approx. 4 lines)
        # compute velocities
        v["dW" + str(l+1)] = beta * v["dW" + str(l+1)] + (1-beta) * grads['dW' + str(l+1)]
        v["db" + str(l+1)] = beta * v["db" + str(l+1)] + (1-beta) * grads['db' + str(l+1)]
        # update parameters
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v['dW' + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v['db' + str(l+1)]
        ### END CODE HERE ###
        
    return parameters, v

In [12]:
#Back Propogation
def relu_diff(da,z):
  dz=da.copy()
  dz[z<=0]=0
  return dz
def sigmoid_diff(da,z):
  a=sigmoid(z)
  dz=da*a*(1-a)
  return dz

def backward_activation(da,cache,activation):
  cache1,cache2=cache
  lambd=0.7
  if activation=="relu":
    dz=relu_diff(da,cache2)
  elif activation=="sigmoid":
    dz=sigmoid_diff(da,cache2)
  a_prev,W,b=cache1
  m=a_prev.shape[1]
  da_prev=np.dot(W.T,dz)
  dW=np.dot(dz,a_prev.T)*(1./m)#+(lambd/m) * W
  db=np.sum(dz,axis=1,keepdims=True)*(1./m)
  return da_prev,dW,db

def back_propogate(O,Y,caches):
  grads={}
  m=O.shape[1]
  L=len(caches)
  Y=Y.reshape(O.shape)

  dO=-(np.divide(Y,O)-np.divide(1-Y,1-O))
  current_cache=caches[L-1]
  grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]=backward_activation(dO,current_cache,"sigmoid")

  for l in reversed(range(L-1)):
    current_cache=caches[l]
    grads["dA"+str(l)],grads["dW"+str(l+1)],grads["db"+str(l+1)]=backward_activation(grads["dA"+str(l+1)],current_cache,"sigmoid")

  return grads


In [13]:
def predict(X,Y,parameters):
  #print(X[:,[0,5,10]])
  m=X.shape[1]
  n=len(parameters)//2
  predictions=np.zeros((1,m))
  prob,cache=forward_propogate(X,parameters)
  for i in range(0,prob.shape[1]):
    if prob[0][i] > 0.5:
        predictions[0][i]=1
    else:
        predictions[0][i]=0
  #print("Accuracy: "+str(np.sum(predictions[0]==Y[0])))
  print("Accuracy: "  + str(np.sum((predictions == Y)/m)))
  return predictions

In [14]:
def initialize_velocity(parameters):
    L = len(parameters) // 2 # number of layers in the neural networks
    v = {}
    
    # Initialize velocity
    for l in range(L):
        ### START CODE HERE ### (approx. 2 lines)
        v["dW" + str(l+1)] = np.zeros_like(parameters['W' + str(l+1)])
        v["db" + str(l+1)] = np.zeros_like(parameters['b' + str(l+1)])
        ### END CODE HERE ###
        
    return v

In [15]:
def model(X,Y,layer_dims,learning_rate=0.0075,num_iterations=1500,print_cost=False):
  parameters=init_parameters(layers_dims)
  v=initialize_velocity(parameters)
  costs=[]
  for i in range(num_iterations):
    O,caches=forward_propogate(X,parameters)
    cost=compute_cost(O,Y,parameters,0.7)
    grads=back_propogate(O,Y,caches)
    parameters,v=update_parameters(parameters,v,grads,learning_rate)
    #print(O,len(O[0]))
    if print_cost and i%5000==0:
      print("Cost after",i,"iteration:",cost)
      costs.append(cost)
  

  return parameters

In [16]:
layers_dims=(len(df.columns) - 1,32,8,1)
parameters=model(X_train.T,y_train.reshape((1,len(y_train))),layers_dims,learning_rate=0.005,num_iterations=100000,print_cost=True)

Cost after 0 iteration: 0.5527827273533291
Cost after 5000 iteration: 0.5394321022580177
Cost after 10000 iteration: 0.530977806275594
Cost after 15000 iteration: 0.5189626620191367
Cost after 20000 iteration: 0.5024976158177745
Cost after 25000 iteration: 0.48079770520985654
Cost after 30000 iteration: 0.4530229978941102
Cost after 35000 iteration: 0.41918190185574905
Cost after 40000 iteration: 0.38198226227846754
Cost after 45000 iteration: 0.3462620724549353
Cost after 50000 iteration: 0.3159323347213807
Cost after 55000 iteration: 0.2920890245939047
Cost after 60000 iteration: 0.2738769523749869
Cost after 65000 iteration: 0.2599738492156455
Cost after 70000 iteration: 0.24930173542231732
Cost after 75000 iteration: 0.24108873215058021
Cost after 80000 iteration: 0.23474728135411504
Cost after 85000 iteration: 0.22980021679347237
Cost after 90000 iteration: 0.22586036605735713
Cost after 95000 iteration: 0.2226209791757344


In [17]:
predict(X_train.T,y_train.reshape((1,len(y_train))),parameters)

Accuracy: 0.9402985074626866


array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0.,
        0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 0.,
        1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1.,
        1., 0., 1.]])

In [18]:
predict(X_test.T,y_test.reshape((1,len(y_test))),parameters)

Accuracy: 0.8620689655172412


array([[1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.,
        0., 1., 0., 0., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])

In [19]:
parameters

{'W1': array([[-7.69610223e-01,  5.50910983e-04, -4.89872916e-01,
         -1.44838322e+00,  2.10026257e-01,  1.67500000e-01,
          1.78509742e-01, -1.25782060e+00,  2.89832092e-01],
        [ 5.69498951e-01, -2.66568401e-01,  2.14071387e-01,
          3.23548730e-01, -9.46196910e-02,  3.39153566e-01,
          7.08353004e-02,  1.97206397e-01,  1.74816400e-01],
        [ 4.24806611e-01, -5.03044697e-01, -4.91106477e-01,
         -9.96669668e-02,  5.43265759e-01, -1.95661490e-01,
          1.29506484e+00,  4.01513417e-01, -1.06969626e-01],
        [-2.91850002e-01, -1.18603375e+00, -7.47234522e-01,
          5.24718277e-01, -5.23838514e-01,  8.61335147e-03,
          6.87946557e-01,  8.56351949e-01, -2.13761781e-02],
        [ 1.75279327e-01,  4.01215410e+00,  8.41826027e-02,
         -3.48382999e-01, -2.39224769e-01,  4.88864997e-02,
          3.76205672e-01, -2.84159195e-01, -5.54783633e-01],
        [ 2.07186566e-01,  4.60026569e-01,  6.33173071e-01,
         -9.15062239e-01,  5.