In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import math


In [2]:
x_raw = np.load('/storage/Galaxy_dataset/X.npy')
y_raw = pd.read_csv('/storage/Galaxy_dataset/training_solutions_rev1.csv',nrows=5049)
y_raw = y_raw['Class1.1']
y_raw = y_raw.values.reshape((y_raw.shape[0],1))

In [3]:
# Flattening the n-dimensional feature space to 2 dimension [(n_x,m),(1,m)]. 
x_flatten = x_raw.reshape(x_raw.shape[0],-1).T
y_flatten = y_raw.reshape(y_raw.shape[0],-1).T

# Standardizing the RGB values
x = x_flatten/255

# Train-Test set
x_train = x[:,:4500]
x_test = x[:,4500:]
y_train = y_flatten[:,:4500]
y_test = y_flatten[:,4500:]

In [4]:
for i in range(0,y_train.shape[1]):
    if y_train[0,i]>0.5:
      y_train[0,i] = 1
    else:
      y_train[0,i] = 0
    

In [5]:
for i in range(0,y_test.shape[1]):
    if y_train[0,i]>0.5:
      y_test[0,i] = 1
    else:
      y_test[0,i] = 0

In [6]:
n0= x_train.shape[0]
m= x_train.shape[1]
n1 = 20
n2 = 7
n3 = 5
n4 = 5
n5 = 1
layers_dims = (n0,n1,n2,n3,n4,n5)
keep_prob = [1,0.86,0.86,0.86,0.86,1]

In [7]:
# print(layers_dims)
# print(m)

In [8]:
def init_parameters(layers_dims):

  np.random.seed(1)
  parameters = {}
  L = len(layers_dims)
  
  for l in range(1,L):
    parameters["W"+ str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])/ np.sqrt(layers_dims[l-1])
    parameters["b"+ str(l)] = np.zeros((layers_dims[l],1))

  return parameters

In [9]:
def mini_batch_sampling (X,Y,minibatch_size,seed):
    np.random.seed(seed)
    m = x_train.shape[1]
    mini_batches = []
    
    permutation = list(np.random.permutation(m))
    shuffled_x = x_train[:,permutation]
    shuffled_y = y_train[:,permutation].reshape(1,m)
    
    total_batches = math.floor(m/minibatch_size)
    for k in range(0,total_batches):
        mini_batch_x = shuffled_x[:,k*minibatch_size:(k+1)*minibatch_size]
        mini_batch_y = shuffled_y[:,k*minibatch_size:(k+1)*minibatch_size]
        mini_batch = (mini_batch_x,mini_batch_y)
        mini_batches.append(mini_batch)
        
        
    if m % minibatch_size != 0:
        mini_batch_x = shuffled_x[:,(k+1)*minibatch_size:]
        mini_batch_y = shuffled_y[:,(k+1)*minibatch_size:]
        mini_batch = (mini_batch_x,mini_batch_y)
        mini_batches.append(mini_batch)
    
    
        
    return mini_batches 

In [10]:
def momentum_init (parameters):
    
    L = len(parameters)//2
    v = {}
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros((parameters['W' + str(l+1)].shape))
        v["db" + str(l+1)] = np.zeros((parameters['b' + str(l+1)].shape))
    return  v
        

In [11]:
def adam_init(parameters):
    
    L = len(parameters) // 2
    v = {}
    s = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros((parameters["W"+str(l+1)].shape))
        v["db" + str(l+1)] = np.zeros((parameters["b"+str(l+1)].shape))
        s["dW" + str(l+1)] = np.zeros((parameters["W"+str(l+1)].shape))
        s["db" + str(l+1)] = np.zeros((parameters["b"+str(l+1)].shape))
    return v,s

In [12]:
parameters =  init_parameters(layers_dims)
# print(parameters)

In [13]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x)),x
def relu(x):
    return x * (x > 0),x

In [14]:
# sigmoid(parameters["W1"])

In [15]:
def linear_forward(A, W, b):
    
  
    Z = np.dot(W,A)+b
    
    cache = (A, W, b)
  
    
    return Z, cache

In [16]:
def drop_out_matrices(layers_dims, keep_prob,minibatch_Y):
    np.random.seed(1)
    D = {}
    L = len(layers_dims)
    

    for l in range(L):
        # initialize the random values for the dropout matrix
        D["D" + str(l)] = np.random.rand(layers_dims[l], minibatch_Y.shape[1])
        # Convert it to 0/1 to shut down neurons corresponding to each element
        D["D" + str(l)] = D["D" + str(l)] < keep_prob[l]
        
    return D

In [17]:
# D = drop_out_matrices(layers_dims,m,keep_prob)

In [18]:
# print(D["D1"])

In [19]:
# Z,cache = linear_forward(x_train,parameters["W1"],parameters["b1"]) 
# print(Z,cache)

In [20]:
def linear_forward_activation(A,W,b,activation):

  if activation == "sigmoid":
    Z,linear_cache = linear_forward(A, W, b)
    A,activation_cache = sigmoid(Z)
  
  elif activation =="relu":
    Z,linear_cache = linear_forward(A,W,b)
    A,activation_cache = relu(Z) 
  cache = (linear_cache,activation_cache)

  return A,cache 

In [21]:
# A,cache = linear_forward_activation(x_train,parameters["W1"],parameters["b1"],activation="relu")
# print(A,cache)

In [22]:
def linear_activation(X,parameters,D,keep_prob):
    caches = []
    A = X
    A = np.multiply(A,D["D0"])
    A = A/keep_prob[0]
    L = len(parameters) // 2
#     print("A_pre 0",A ,"/n")
#     print("A_post 0",A ,"/n")
    
    
    for l in range(1, L):
        A_prev = A 
        A, cache = linear_forward_activation(A,parameters['W' + str(l)],
                                             parameters['b' + str(l)], activation = "relu")
#         print("A_pre",A ,l,"/n")
        A = np.multiply(A,D["D"+str(l)])
        A = A/keep_prob[l]
#         print("A_post",A ,l,"/n")
        caches.append(cache)
    
    
    AL, cache = linear_forward_activation(A, parameters['W' + str(L)],
                                          parameters['b' + str(L)], activation = "sigmoid")
    
    caches.append(cache)
   
    
  
            
    return AL, caches
  

In [23]:
# AL,caches = linear_activation(x_train,parameters,D,keep_prob)


In [24]:
# print(AL)

In [25]:
def cost_function(Y,AL):

  m  = Y.shape[1]
  cost = -np.sum(np.multiply(Y,np.log(AL))+np.multiply(1-Y,np.log(1-AL)))
  return cost

In [26]:
# cost =  cost_function(y_train,AL)
# print(cost)

In [27]:
def relu_backward(A):
  R = 1*(A>0)
  return R

def sigmoid_backward(activation_cache):
  A,o = sigmoid(activation_cache)
  S = A*(1-A)
  return S

In [28]:
 def linear_backward(dZ,cache):

  A_prev,W,b = cache
  m = A_prev.shape[1]
  
  dW = 1/m*np.dot(dZ,A_prev.T)
  db = 1/m*np.sum(dZ,axis=1,keepdims=True)
  dA_prev = np.dot(W.T,dZ)

  return dA_prev,dW,db

In [29]:
def linear_activation_backward(dA,cache,activation):

  linear_cache, activation_cache = cache
    
  if activation == "relu":
        
    dZ = dA*relu_backward(activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
        
  elif activation == "sigmoid":
        
    dZ = dA*sigmoid_backward(activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
      
    
  return dA_prev, dW, db

In [30]:
# asd,sdf,fgh = linear_activation_backward(dAL,current_cac,activation = "sigmoid")
# print(asd,sdf,fgh)

In [31]:
def L_model_backward(AL, Y, caches,D,keep_prob):
    
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
    
  
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
   
    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL,current_cache, activation = "sigmoid")
    grads["dA" + str(L-1)] = np.multiply(grads["dA"+ str(L-1)],D["D"+str(L-1)])
    grads["dA" + str(L-1)] = grads["dA" + str(L-1)] / keep_prob[L-1]

    for l in reversed(range(L-1)):
        
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l+1)], current_cache, activation = "relu") 
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        grads["dA" + str(l)] = np.multiply(grads["dA" + str(l)],D["D" + str(l)])
        grads["dA" + str(l)] = grads["dA" + str(l)] / keep_prob[l]
    

    return grads

In [32]:
# grads = L_model_backward(AL, y_train, caches,D,keep_prob)

In [33]:
# print(grads)

In [34]:
def update_param(parameters,parameter_grad,learning_rate):

  L = len(parameters) // 2
  for l in range(1,L):
    parameters["W"+str(l)] = parameters["W"+str(l)] - learning_rate*parameter_grad["dW"+str(l)]
    parameters["b"+str(l)] = parameters["b"+str(l)] - learning_rate*parameter_grad["db"+str(l)] 
  return parameters

In [35]:
def update_parameters_with_momentum(parameters, grads, v, beta, learning_rate):
    
    L = len(parameters) // 2 # number of layers in the neural networks
    
    # Momentum update for each parameter
    for l in range(L):
        
        ### START CODE HERE ### (approx. 4 lines)
        # compute velocities
        v["dW" + str(l+1)] = beta*v["dW" + str(l+1)] + (1-beta)*grads["dW"+str(l+1)]
        v["db" + str(l+1)] = beta*v["db" + str(l+1)] + (1-beta)*grads["db"+str(l+1)]
        # update parameters
        parameters["W" + str(l+1)] =  parameters["W" + str(l+1)] - learning_rate * v["dW" + str(l+1)]
        parameters["b" + str(l+1)] =  parameters["b" + str(l+1)] - learning_rate * v["db" + str(l+1)]
        ### END CODE HERE ###
        
    return parameters, v

In [36]:
def update_parameters_with_adam(parameters,grads,v,s,t,beta1,beta2,learning_rate,epsilon):
    
    L = len(parameters)// 2
    v_corrected = {}
    s_corrected = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1-beta1)*grads["dW"+str(l+1)]
        v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1-beta1)*grads["db"+str(l+1)]
        
        v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)]/(1-np.power(beta1,t))
        v_corrected["db" + str(l+1)] = v["db" + str(l+1)]/(1-np.power(beta1,t))
        
        s["dW" + str(l+1)] = beta2 *s["dW" + str(l+1)] + (1-beta2)*(grads["dW"+str(l+1)])**2
        s["db" + str(l+1)] = beta2 *s["db" + str(l+1)] + (1-beta2)*(grads["db"+str(l+1)])**2
        
        s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)]/(1-np.power(beta2,t))
        s_corrected["db" + str(l+1)] = s["db" + str(l+1)]/(1-np.power(beta2,t))
        
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*(v_corrected["dW" + str(l+1)]/(np.sqrt(s_corrected["dW" + str(l+1)])+epsilon))
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*(v_corrected["db" + str(l+1)]/(np.sqrt(s_corrected["db" + str(l+1)])+epsilon))
        
    return parameters,v,s

In [37]:
def L_layer_model(X, Y, layers_dims,optimizer ,learning_rate, mini_batch_size , beta ,beta1, beta2 ,epsilon,
                  num_epochs,keep_prob, print_cost):
   

    np.random.seed(1)
    costs = []
    seed = 10
    m = X.shape[1]
    
    
  
    parameters = init_parameters(layers_dims)
    
    if optimizer == "gd":
        pass
    elif optimizer == "momentum":
        v = momentum_init(parameters)
    elif optimizer ==  "adam":
        v,s = adam_init(parameters)
    
    
   
    for i in range(num_epochs):

        seed = 10
        seed = seed+1
        minibatches =  mini_batch_sampling (X,Y,mini_batch_size,seed)
        cost_total = 0
        t= 0
        
        for minibatch in minibatches:
            
            (minibatch_X, minibatch_Y) = minibatch
            
            D = drop_out_matrices(layers_dims,keep_prob,minibatch_Y)
            
            AL, caches = linear_activation(minibatch_X, parameters,D,keep_prob)

            
            cost_total +=  cost_function(minibatch_Y,AL)
           

            grads = L_model_backward(AL, minibatch_Y, caches,D,keep_prob)

            if optimizer=="gd":
                parameters = update_param(parameters, grads, learning_rate)
            elif optimizer == "momentum":
                parameters,v = update_parameters_with_momentum(parameters, grads, v, beta, learning_rate)
            elif optimizer == "adam":
                t = t + 1
                parameters,v,s == update_parameters_with_adam(parameters,grads,v,s,t,beta1,beta2,learning_rate,epsilon)
        
        cost_avg = cost_total / m
       
        if print_cost and i % 1 == 0:
            print ("Cost after iteration %i: %f" %(i, cost_avg))
        if print_cost and i % 1 == 0:
            costs.append(cost_avg)

    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

In [None]:
paramet = L_layer_model(x_train, y_train, layers_dims,"gd" ,0.0007, 128 , 0.9 ,0.9, 0.999 ,1e-8,
                  2000,keep_prob, True)
print("End")

Cost after iteration 0: 0.687717
Cost after iteration 1: 0.684343
Cost after iteration 2: 0.681760
Cost after iteration 3: 0.679661
Cost after iteration 4: 0.677732
Cost after iteration 5: 0.676023
Cost after iteration 6: 0.674393
Cost after iteration 7: 0.672689
Cost after iteration 8: 0.671387
Cost after iteration 9: 0.670191
Cost after iteration 10: 0.668949
Cost after iteration 11: 0.667860
Cost after iteration 12: 0.666831
Cost after iteration 13: 0.665902
Cost after iteration 14: 0.664984
Cost after iteration 15: 0.664222
Cost after iteration 16: 0.663547
Cost after iteration 17: 0.662757
Cost after iteration 18: 0.662116
Cost after iteration 19: 0.661446
Cost after iteration 20: 0.660776
Cost after iteration 21: 0.660166
Cost after iteration 22: 0.659628
Cost after iteration 23: 0.659123
Cost after iteration 24: 0.658622
Cost after iteration 25: 0.658177
Cost after iteration 26: 0.657706
Cost after iteration 27: 0.657235
Cost after iteration 28: 0.656809
Cost after iteration 29:

In [None]:
def predict(X,y,parameters):
  m = X.shape[1]
  p = np.zeros((1,m))
  yc = np.zeros((1,m))
  n = len(parameters)// 2 

  probas,caches = linear_activation(X, parameters)

  for i in range(0,probas.shape[1]):
    if probas[0,i]>0.5:
      p[0,i] = 1
    else:
      p[0,i] = 0
    if y[0,i]>0.5:
      yc[0,i] = 1
    else:
      yc[0,i] = 0
  print("probas",str(probas))  
  print("predictions",str(p))
  print("yc", str(yc))
  print("label", str(y))
  print("Accuracy: "  + str(np.sum((p == yc)/m)))
  return p;

In [None]:
train_accuracy = predict(x_train,y_train,paramet)

In [None]:
test_accuracy = predict(x_test,y_test,paramet)

In [None]:
test_accuracy = predict(x_test,y_test,paramet)

In [None]:
np.save('/storage/Galaxy_dataset/parameter.npy',paramet)

In [None]:
np.save('/storage/Galaxy_dataset/probas.npy',probas)