<a href="https://colab.research.google.com/github/Gogulaanand/Fault-Diagnosis/blob/master/Fault_Diagnosis_Adam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
import math
import io
import tensorflow as tf

In [2]:
from google.colab import files
uploaded = files.upload()

Saving projdata.csv to projdata (6).csv


In [3]:
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

User uploaded file "projdata.csv" with length 132145 bytes


In [0]:
df=pd.read_csv(io.StringIO(uploaded['projdata.csv'].decode("utf-8")),engine='python',sep=",")
ds=df.sample(frac=1)


In [5]:
y=np.array(df[['Fault']])
X=ds.as_matrix(columns=df.columns[1:])
X=X.T
y=y.T
print(X.shape)

(3, 3287)


In [0]:
def sig(x):
  return 1.0/(1.0+np.exp(-x))

In [0]:
def relu(x):
    return x*(x>0)

In [0]:
def leaky_relu(x):
    return x/(1+np.exp(-x))

In [9]:
nx=np.min(np.shape(X))
ny=np.min(np.shape(y))
nh=3
nl=2
layer_dims=[nx,nh,nh,ny]
print("Size of input layer: "+str(nx))
print("Size of output layer: "+str(ny))
print("Size of hidden units in each layer: "+str(nh))
print("No of hidden layers: "+str(nl))


Size of input layer: 3
Size of output layer: 1
Size of hidden units in each layer: 3
No of hidden layers: 2


In [0]:
def initialise_params(layer_dims):
    
    L=len(layer_dims)
    
    params={}
    
    for l in range(1,L,1):
        params["W"+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])*np.sqrt(2/layer_dims[l-1])
        params["b"+str(l)]=np.zeros((layer_dims[l],1))
            
    for l in range(1,L):
        assert (params["W"+str(l)].shape == (layer_dims[l],layer_dims[l-1]))
        assert (params["W"+str(l)].shape == (layer_dims[l],layer_dims[l-1]))
        
    
    return params
    

In [0]:

params=initialise_params(layer_dims)

In [0]:
def forward_prop(X,params):
    
    W1=params["W1"]
    b1=params["b1"]
    W2=params["W2"]
    b2=params["b2"]
    W3=params["W3"]
    b3=params["b3"]
    
    Z1=np.dot(W1,X)+b1
    A1=np.tanh(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=np.tanh(Z2)
    Z3=np.dot(W3,A2)+b3
    A3=sig(Z3)
    
    cache={"Z1":Z1,"A1":A1,"Z2":Z2,"A2":A2,"Z3":Z3,"A3":A3}
    
    return A3,cache

In [0]:
A3,cache=forward_prop(X,params)

In [0]:
def compute_cost(y,A3,params,lambd=0):
    
    m=np.max(y.shape)
        
    c1=np.dot(y,(np.log(A3)).T)
    c2=np.dot((1-y),(np.log(1-A3)).T)
    
    w1=np.sum(np.square(params["W1"]))
    w2=np.sum(np.square(params["W2"]))
    w3=np.sum(np.square(params["W3"]))
    
    c3=(lambd/(2*m))*(w1+w2+w3)
    
    cost = -((c1+c2)/m)+c3
    
    cost = np.squeeze(cost) 
    return (np.sum(cost))

In [15]:
cost=compute_cost(y,A3,params,lambd=0)
print(cost)

0.7896069605718027


In [0]:
def random_mini_batch(X,y,mini_batch_size=128):
  
  m=X.shape[1]
  
  
  
  permutation=list(np.random.permutation(m))      #to shuffle the x and y matrices
  shuffled_X=X[:,permutation]
  shuffled_Y=y[:,permutation].reshape((1,m))
  

  mini_batches=[]
  
  total_possible_mini_batches=math.floor(m/mini_batch_size)
  
  for k in range(total_possible_mini_batches):
    
    mini_batch_X=shuffled_X[:,(mini_batch_size*k):(mini_batch_size*(k+1))]
    mini_batch_Y=shuffled_Y[:,(mini_batch_size*k):(mini_batch_size*(k+1))]
    
    mini_batch=(mini_batch_X,mini_batch_Y)
   
    mini_batches.append(mini_batch)
    
  
  if m%mini_batch_size!=0:
    
    mini_batch_X=shuffled_X[:,total_possible_mini_batches*mini_batch_size:m]
    mini_batch_Y=shuffled_Y[:,total_possible_mini_batches*mini_batch_size:m]
   
    mini_batch=(mini_batch_X,mini_batch_Y)
    mini_batches.append(mini_batch)
  
    
  return mini_batches

In [0]:
mini_batches=random_mini_batch(X,y,mini_batch_size=128)

In [0]:
def backward_prop(y,params,cache,X,lambd=0):
    
    W1=params["W1"]
    b1=params["b1"]
    W2=params["W2"]
    b2=params["b2"]
    W3=params["W3"]
    b3=params["b3"]
    
    m=np.max(y.shape)
    
    A1=cache["A1"]
    A2=cache["A2"]
    A3=cache["A3"]
    
    
    dZ3 = A3-y
    dW3 = np.dot(dZ3,A3.T)/m+((lambd/m)*W3)
    db3 = np.sum(dZ3,axis=1,keepdims=True)/m
    dZ2 = np.dot(W3.T,dZ3)*(1-np.power(A2,2))
    dW2 = np.dot(dZ2,A2.T)/m+((lambd/m)*W2)
    db2 = np.sum(dZ2,axis=1,keepdims=True)/m
    dZ1 = np.dot(W2.T,dZ2)*(1-np.power(A1,2))
    dW1 = np.dot(dZ1,X.T)/m+((lambd/m)*W1)
    db1 = np.sum(dZ1,axis=1,keepdims=True)/m
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2,
             "dW3": dW3,
             "db3": db3}
    
    return grads


In [0]:
grads=backward_prop(y,params,cache,X,lambd=0)

In [0]:
def initialise_adam_params(params):
  
  W1=params["W1"]
  b1=params["b1"]
  W2=params["W2"]
  b2=params["b2"]
  W3=params["W3"]
  b3=params["b3"]
  
  v={}
  s={}
  
  for l in range(len(params)//2):
    
    v["dW"+str(l+1)]=np.zeros(params["W"+str(l+1)].shape)
    v["db"+str(l+1)]=np.zeros(params["b"+str(l+1)].shape)
    s["dW"+str(l+1)]=np.zeros(params["W"+str(l+1)].shape)
    s["db"+str(l+1)]=np.zeros(params["b"+str(l+1)].shape)
                                                    
  return v,s
  

In [0]:
v,s=initialise_adam_params(params)

In [0]:
def update_params_with_adam(params,v,s,grads,learning_rate=0.005,beta1=0.9,beta2=0.999,epsilon=1e-7):
  
  L=len(params)//2
  v_corrected={}
  s_corrected={}
  
  for l in range(L):
    
    v["dW"+str(l+1)]=beta1*v["dW"+str(l+1)]+(1-beta1)*grads["dW"+str(l+1)]
    v["db"+str(l+1)]=beta1*v["db"+str(l+1)]+(1-beta1)*grads["db"+str(l+1)]
    
    v_corrected["dW"+str(l+1)]=v["dW"+str(l+1)]/(1-(beta1**2))
    v_corrected["db"+str(l+1)]=v["db"+str(l+1)]/(1-beta1**2)
    
    s["dW"+str(l+1)]=beta2*s["dW"+str(l+1)]+(1-beta2)*(grads["dW"+str(l+1)]**2)
    s["db"+str(l+1)]=beta2*s["db"+str(l+1)]+(1-beta2)*(grads["db"+str(l+1)]**2)
    
    s_corrected["dW"+str(l+1)]=s["dW"+str(l+1)]/(1-beta2**2)
    s_corrected["db"+str(l+1)]=s["db"+str(l+1)]/(1-beta2**2)
    
    
    params["W"+str(l+1)] = params["W"+str(l+1)]-(learning_rate*(v_corrected["dW"+str(l+1)]/np.sqrt(s_corrected["dW"+str(l+1)]+epsilon)))
    params["b"+str(l+1)] = params["b"+str(l+1)]-(learning_rate*(v_corrected["db"+str(l+1)]/np.sqrt(s_corrected["db"+str(l+1)]+epsilon)))
                                               
  return params,v,s 
  

In [0]:
params,v,s=update_params_with_adam(params,v,s,grads,learning_rate=0.005)

In [0]:
def nn_model(X,y,layer_dims,learning_rate=0.005,mini_batch_size=128,num_of_epochs=5000,beta1=0.9,beta2=0.999,epsilon=1e-8,print_cost=True):
  
  
  params=initialise_params(layer_dims)
  
  costs=[]
  
  v,s=initialise_adam_params(params)
  
  for i in range(num_of_epochs):
    
    mini_batches=random_mini_batch(X,y,mini_batch_size)
    
    for batch in mini_batches:
      
      (X_batch,Y_batch)=batch
      
      a3,caches=forward_prop(X_batch,params)
      
      cost=compute_cost(Y_batch,a3,params)

      grads=backward_prop(Y_batch,params,caches,X_batch,lambd=0)
      
      params,v,s=update_params_with_adam(params,v,s,grads,learning_rate,beta1,beta2,epsilon)
      
      
    if print_cost and i%500==0:
      print("cost after epoch %i: %f" %(i,cost))
    
    if print_cost and i%100==0:
      costs.append(cost)

  return params

In [25]:
params=nn_model(X,y,layer_dims,learning_rate=0.1,mini_batch_size=128,num_of_epochs=5000)

cost after epoch 0: 0.813141


  


cost after epoch 500: nan


  
  """


cost after epoch 1000: nan
cost after epoch 1500: nan
cost after epoch 2000: nan
cost after epoch 2500: nan
cost after epoch 3000: nan
cost after epoch 3500: nan
cost after epoch 4000: nan


KeyboardInterrupt: ignored

In [0]:
def predict(X,params):
  
  A3,cache=forward_prop(X,params)
  
  predictions=(A3>0.5)
  
  return predictions

In [0]:
predictions=predict(X,params)

print("predictions mean = "+str(np.mean(predictions)))

print("Accuracy: %d"%float((np.dot(y,predictions.T)+np.dot(1-y,1-predictions.T))/float(y.size)*100)+"%")

In [0]:
hidden_layer_sizes = [3, 4, 5, 15, 16, 17, 18, 19, 20]
for i, nh in enumerate(hidden_layer_sizes):
    layer_dims=[nx,nh,nh,ny]
    parameters = nn_model(X, y,layer_dims)
    predictions = predict(X,parameters)
    accuracy = float((np.dot(y,predictions.T) + np.dot(1-y,1-predictions.T))/float(y.size)*100)
    print ("Accuracy for {} hidden units: {} %".format(nh, accuracy))