In [29]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import copy

In [81]:
#Loading in the dataset with pandas
data = pd.read_csv("banana_quality.csv")


In [82]:
#Checking out the different features
df = pd.DataFrame(data).dropna().replace({"Good": 0, "Bad": 1})
df


  df = pd.DataFrame(data).dropna().replace({"Good": 0, "Bad": 1})


Unnamed: 0,Size,Weight,Sweetness,Softness,HarvestTime,Ripeness,Acidity,Quality
0,-1.924968,0.468078,3.077832,-1.472177,0.294799,2.435570,0.271290,0
1,-2.409751,0.486870,0.346921,-2.495099,-0.892213,2.067549,0.307325,0
2,-0.357607,1.483176,1.568452,-2.645145,-0.647267,3.090643,1.427322,0
3,-0.868524,1.566201,1.889605,-1.273761,-1.006278,1.873001,0.477862,0
4,0.651825,1.319199,-0.022459,-1.209709,-1.430692,1.078345,2.812442,0
...,...,...,...,...,...,...,...,...
7995,-6.414403,0.723565,1.134953,2.952763,0.297928,-0.156946,2.398091,1
7996,0.851143,-2.217875,-2.812175,0.489249,-1.323410,-2.316883,2.113136,1
7997,1.422722,-1.907665,-2.532364,0.964976,-0.562375,-1.834765,0.697361,1
7998,-2.131904,-2.742600,-1.008029,2.126946,-0.802632,-3.580266,0.423569,1


In [83]:

#Deviding the dataset into an X-matrix and an y-matrix
X = df.drop(columns=['Quality'])
y = df['Quality']



In [104]:
#Splitting the dataset intto training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)


X_test = X_test.T
y_test = np.reshape(y_test, (1, 6400))
y_test= np.where(y_test == "good", 1, 0)
print(y_test.shape)
print(X_test.shape)
print()


X_train = X_train.T
y_train = np.reshape(y_train, (1, 1600))
y_train= np.where(y_train == "good", 1, 0)

print(np.shape(y_train))
print(np.shape(X_train))


(1, 6400)
(7, 6400)

(1, 1600)
(7, 1600)


In [105]:
#Initializing parameters

def initialize_parameters(layer_dims):

  parameters = {}

  L = len(layer_dims)

  for l in range(1, L):

    parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
    parameters["b" + str(l)] = np.zeros((layer_dims[l], 1))


  return parameters


In [106]:
#Linear part of each neuron

def linear_part(A, W, b):

  Z = np.dot(W,A) + b
  cache = (A, W, b)



  return Z, cache

In [107]:
#Sigmoid function
def sigmoid(Z):
  return 1/(1+np.exp(-Z)), Z




In [108]:
#Relu function
def relu(Z):

  return np.maximum(0,Z), Z



In [109]:
#Activation part of each neuron
def activation_part(A_prev, W, b, activation):

  if activation == "sigmoid":
    Z, linear_cache = linear_part(A_prev, W, b)
    A, activation_cache = sigmoid(Z)



  elif activation == "relu":
    Z, linear_cache = linear_part(A_prev, W, b)
    A, activation_cache = relu(Z)

  cache = (linear_cache, activation_cache)

  return A, cache


In [110]:
#The forward propagation
def L_model_forward(X, parameters):

  caches = []
  A = X
  L = len(parameters) // 2


  for l in range(1, L):
    A_prev = A

    A, cache = activation_part(A_prev, parameters["W" + str(l)],parameters["b" + str(l)], "relu")
    caches.append(cache)


  AL, cache = activation_part(A, parameters["W" + str(l)],parameters["b" + str(l)], "sigmoid")
  caches.append(cache)


  return AL, caches

In [111]:
#Computing the cost
def compute_cost(AL, Y):

  m = np.shape(Y)[0]


  cost = (-1/m) * (np.dot(Y, np.log(AL).T) + np.dot((1-Y), np.log(1-AL).T))

  cost = np.squeeze(cost)

  return cost

In [112]:
def linear_backward(dZ, cache):


  A_prev, W, b = cache

  m = A_prev.shape[0]

  dW = (1/m) * np.dot(dZ, A_prev.T)
  db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
  dA_prev = np.dot(W.T,dZ)



  return dA_prev, dW, db

In [113]:
def sigmoid_backward(dA, cache):

    Z = cache
    sigmoid_Z = 1 / (1 + np.exp(-Z))
    dZ = dA * sigmoid_Z * (1 - sigmoid_Z)

    return dZ



In [114]:
def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single ReLU unit.

    Arguments:
    dA -- post-activation gradient, same shape as A

    Returns:
    dZ -- gradient of the cost with respect to Z
    """
    Z = cache
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0

    return dZ




In [115]:
def linear_activation_backward(dA, cache, activation):

  linear_cache, activation_cache = cache


  if activation == "relu":
    dZ = relu_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)



  elif activation == "sigmoid":

    dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)



  return dA_prev, dW, db

In [116]:
# GRADED FUNCTION: L_model_backward

def L_model_backward(AL, Y, caches):

    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL


    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = caches[L-1] # Last Layer
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")

    for l in reversed(range(L-1)):

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp



    return grads

In [117]:
def update_parameters(params, grads, learning_rate):

    parameters = copy.deepcopy(params)
    L = len(parameters) // 2

    for l in range(L):


        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]


    return parameters

In [118]:

### CONSTANTS DEFINING THE MODEL ####
n_x = 7     # num_px * num_px * 3
n_h = 7
n_y = 1
layers_dims = (n_x, n_h, n_y)
learning_rate = 0.0075

In [119]:
def two_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
    """
    Implements a two-layer neural network: LINEAR->RELU->LINEAR->SIGMOID.

    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector (containing 1 if cat, 0 if non-cat), of shape (1, number of examples)
    layers_dims -- dimensions of the layers (n_x, n_h, n_y)
    num_iterations -- number of iterations of the optimization loop
    learning_rate -- learning rate of the gradient descent update rule
    print_cost -- If set to True, this will print the cost every 100 iterations

    Returns:
    parameters -- a dictionary containing W1, W2, b1, and b2
    """

    np.random.seed(1)
    grads = {}
    costs = []                              # to keep track of the cost
    m = X.shape[1]                           # number of examples
    (n_x, n_h, n_y) = layers_dims

    # Initialize parameters dictionary, by calling one of the functions you'd previously implemented
    #(≈ 1 line of code)
    # parameters = ...
    # YOUR CODE STARTS HERE

    parameters = initialize_parameters((n_x, n_h, n_y))

    # YOUR CODE ENDS HERE

    # Get W1, b1, W2 and b2 from the dictionary parameters.
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # Loop (gradient descent)

    for i in range(0, num_iterations):

        # Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. Inputs: "X, W1, b1, W2, b2". Output: "A1, cache1, A2, cache2".
        #(≈ 2 lines of code)
        # A1, cache1 = ...
        # A2, cache2 = ...
        # YOUR CODE STARTS HERE
        A1, cache1 = activation_part(X, W1, b1, "relu")
        A2, cache2 = activation_part(A1, W2, b2, "sigmoid")

        # YOUR CODE ENDS HERE

        # Compute cost
        #(≈ 1 line of code)
        # cost = ...
        # YOUR CODE STARTS HERE

        cost = compute_cost(A2, Y)

        # YOUR CODE ENDS HERE

        # Initializing backward propagation
        dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))

        # Backward propagation. Inputs: "dA2, cache2, cache1". Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".
        #(≈ 2 lines of code)
        # dA1, dW2, db2 = ...
        # dA0, dW1, db1 = ...
        # YOUR CODE STARTS HERE
        dA1, dW2, db2 = linear_activation_backward(dA2, cache2, "sigmoid")
        dA0, dW1, db1 = linear_activation_backward(dA1, cache1, "relu")

        # YOUR CODE ENDS HERE

        # Set grads['dWl'] to dW1, grads['db1'] to db1, grads['dW2'] to dW2, grads['db2'] to db2
        grads['dW1'] = dW1
        grads['db1'] = db1
        grads['dW2'] = dW2
        grads['db2'] = db2

        # Update parameters.
        #(approx. 1 line of code)
        # parameters = ...
        # YOUR CODE STARTS HERE

        parameters = update_parameters(parameters, grads, learning_rate)

        # YOUR CODE ENDS HERE

        # Retrieve W1, b1, W2, b2 from parameters
        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]

        # Print the cost every 100 iterations
        if print_cost and i % 100 == 0 or i == num_iterations - 1:
            print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
        if i % 100 == 0 or i == num_iterations:
            costs.append(cost)

    return parameters, costs, A2

def plot_costs(costs, learning_rate=0.0075):
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

In [120]:
parameters, costs, A2 = two_layer_model(X_train, y_train, layers_dims = (n_x, n_h, n_y), num_iterations = 1000, print_cost=True)

print("Cost after first iteration: " + str(costs[0]))


Cost after iteration 0: 1109.1237077352948
Cost after iteration 100: 1.6377219853135188
Cost after iteration 200: 0.6921064097560468
Cost after iteration 300: 0.43059841087398953
Cost after iteration 400: 0.3104370200537869
Cost after iteration 500: 0.24182932176205918
Cost after iteration 600: 0.1975681804630608
Cost after iteration 700: 0.1666957335720538
Cost after iteration 800: 0.14396523020736993
Cost after iteration 900: 0.12656285451466862
Cost after iteration 999: 0.11294485810140925
Cost after first iteration: 1109.1237077352948


In [121]:
AL, Caches = L_model_forward(X_test, parameters)







In [122]:
A1, cache1 = activation_part(X_test, parameters["W1"], parameters["b1"], "relu")
A2, cache2 = activation_part(A1, parameters["W2"], parameters["b2"], "sigmoid")


In [123]:
A2[0][0]
print(np.shape(y_test))
print(y_test[0])

teller = 0

for i in y_test[0]:
    if i == 0:
        teller += 1
    else:
        break

print(teller)




(1, 6400)
[0 0 0 ... 0 0 0]
6400
