In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import minmax_scale as mms

In [2]:
def load_data(path,name):
    csv_path = os.path.join(path, name)
    return pd.read_csv(csv_path)

In [3]:
path = "C:/Users/DanielaFe7/Desktop/Maestrado/Redes/RedesNeurais"
name = "winequality-red.csv"
data = load_data(path,name)
print("Loading wine dataset ...")
data.head()

Loading wine dataset ...


Unnamed: 0.1,Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,category
0,0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid
1,1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,Mid
2,2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,Mid
3,3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,Mid
4,4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,Mid


# Pre-processing Data Set

In [4]:
print("Convert Categorical Output to One Hot Vector")
def categorical_output(Y_index): 
    #Y = data["category"]
    Y = Y_index.values.reshape(-1,1)
    enc = preprocessing.OneHotEncoder()
    enc.fit(Y)
    Y = enc.transform(Y).toarray() #Converting in hot vectors
    return Y

Convert Categorical Output to One Hot Vector


In [5]:
print("Preparing inputs droping Labels and droping indexs")
def preprocessing_input(X_index):
    #X = data
    #X = X_index.drop("Unnamed: 0", axis = 1)
    #X = X_index.drop("category", axis = 1)
    X = mms(X_index) #Scalling 
    return X
    #X.head()

Preparing inputs droping Labels and droping indexs


# NN L-Layer

In [6]:
def sigmoid(Z):
    S = 1 / (1 + np.exp(-Z))
    return S,Z

In [7]:
def derivade_sigmoid(z):
    gz,z = sigmoid(z) 
    return gz * (1-gz);

In [8]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network, (including input layer)

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.normal(0, 1, (layer_dims[l], layer_dims[l-1]))
        parameters['b' + str(l)] = np.random.random((layer_dims[l], 1))
      
    return parameters

In [9]:
def initialize_velocity(parameters):
    L = len(parameters) // 2 # number of layers in the neural networks (not input layer including)
    v = {}
    
    for l in range(L):
        v["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l+1)])
        v["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l+1)])
        
    return v

In [10]:
def accuracy(Y_pred, Y):
    contador = 0
    Y_pred = Y_pred.T
    Y = Y.T
    for i in range(Y_pred.shape[0]) :
        if np.array_equal(Y_pred[i],Y[i]):
            contador +=1
    accuracy = (contador*100.0)/Y_pred.shape[0]
    return accuracy

# Forward Propagation in L - Layers

In [11]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A, W, b)
    return Z, cache

In [12]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
        
    cache = (linear_cache, activation_cache) #linear_cahce: A_prev,Wi,bi - activation_cache: Zi
    return A, cache

In [13]:
def L_model_forward(X,Y,parameters,pred=False):
    caches = []
    A = X                                     #(input size, number of examples)
    L = len(parameters) // 2                  # number of layers in the neural network (not input layer including)
    
    # Implement [LINEAR -> SIGMOID]*(L-1). To L-1 Layers
    for l in range(1, L):
        A_prev = A 
        W=parameters['W' + str(l)]
        b=parameters['b' + str(l)]
        A, cache = linear_activation_forward(A_prev, W, b, "sigmoid")
        caches.append(cache)
    
    # Implement LINEAR -> SIGMOID. Last layer
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)
    
    # Predicction
    if pred:
        # The max value is 1 , else 0
        AL_temp = AL.T
        Y_prediction = np.zeros_like(AL_temp)
        Y_prediction[np.arange(len(AL_temp)), AL_temp.argmax(1)] = 1
        Y_prediction = Y_prediction.T
        return Y_prediction
    return AL, caches

In [14]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (- 1 / m) * np.sum(Y * np.log(AL) + (1 - Y) * (np.log(1 - AL)))
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    return cost

# Backward Propagation in L - Layers

In [15]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1/m) * np.dot(dZ,A_prev.T)
    db = (1/m) * np.sum(dZ,axis=1,keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev, dW, db

In [16]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == "sigmoid":
        dZ = dA * derivade_sigmoid(activation_cache) # activation_cache = Z ; dA = np.dot(W.T,dZ) excep the first dA
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)    
    return dA_prev, dW, db

In [17]:
def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches) # the number of layers (not input layer including)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    # Initializing the backpropagation
    dAL =- (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # derivative of cost with respect to AL
    
    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
    current_cache = caches[L-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, 
                                                                                                  current_cache, "sigmoid")
    
    for l in reversed(range(L-1)):
        # lth layer: (SIGMOID -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] 
        current_cache = caches[l]
        dA_prev, dW, db = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, "sigmoid")
        grads["dA" + str(l + 1)] = dA_prev
        grads["dW" + str(l + 1)] = dW
        grads["db" + str(l + 1)] = db

    return grads

# Update Parameters

In [18]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in the neural network (not input layer including)

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l+1)]

    return parameters

# Update Parameters with Momentum

In [19]:
def update_parameters_with_momentum(parameters, grads, v, learning_rate, beta):
    L = len(parameters) // 2 # number of layers in the neural networks (not input layer including)
    
    # Momentum update for each parameter
    for l in range(L):
        v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * grads['dW' + str(l + 1)]
        v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * grads['db' + str(l + 1)]
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v["db" + str(l + 1)]
        
    return parameters, v

# L - layer model

In [38]:
m = []
def L_layer_model(X, Y, X_test, Y_test, layers_dims, learning_rate, num_iterations, print_cost=False):
    np.random.seed(1)
    costs = []                         
    
    # Parameters initialization.
    parameters = initialize_parameters_deep(layers_dims)
    
    #Velocity initialization.
    v = initialize_velocity(parameters)
    
    # Loop (gradient descent)
    for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> SIGMOID]*(L-1) -> LINEAR -> SIGMOID.
        AL, caches = L_model_forward(X,Y,parameters,pred=False)
        
        # Compute cost.
        cost = compute_cost(AL, Y)
    
        # Backward propagation.
        grads = L_model_backward(AL, Y, caches)
 
        # Update parameters. #Esto tem que quedar comentado si vc vai usar com Momemtum
        #parameters = update_parameters(parameters, grads, learning_rate)
        
        # Update parameters with Momentum.
        parameters, v = update_parameters_with_momentum(parameters, grads, v, learning_rate, beta = 0.99)
                
        # Print the cost every 100 training example
        if print_cost and i % 500 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 500 == 0:
            costs.append(cost)
            
    # Train Predicction
    Y_prediction =  L_model_forward(X,Y,parameters,pred=True)
    print("------------------")
    #print("Train Prediccion:")
    #print(Y_prediction.shape)
    #print(Y_prediction)
    print("Train Accuracy: ",accuracy(Y_prediction, Y))  
    
    # Test Predicction
    print("------------------")
    Y_prediction =  L_model_forward(X_test,Y_test,parameters,pred=True)
    #print("Test Prediccion:")
    #print(Y_prediction.shape)
    #print(Y_prediction)
    print("Test Accuracy: ",accuracy(Y_prediction, Y_test))  
      
    # plot the cost
    #plt.plot(np.squeeze(costs))
    #plt.ylabel('cost')
    #plt.xlabel('iterations (per tens)')
    #plt.title("Learning rate =" + str(learning_rate))
    #plt.show()
            
    
    return parameters

# Cross Validation : StratifiedKFold

In [46]:
from sklearn.model_selection import StratifiedKFold
# We can use StratifiedKFold because is an Classification problem
Y_index = data["category"]
X_index = data
X_index = X_index.drop("Unnamed: 0", axis = 1)
X_index = X_index.drop("category", axis = 1)
skf = StratifiedKFold(n_splits=10,random_state=2,shuffle=True)
skf.get_n_splits(X_index, Y_index)
print(skf)

# When the indexes are ready in (split function), pick the data that is already pre-processing
XX = data.drop("Unnamed: 0", axis = 1)
XX = XX.drop("category", axis = 1)
XX = mms(XX)

YY = data["category"]
YY = categorical_output(YY)
print(XX.shape)
print(YY.shape)


StratifiedKFold(n_splits=10, random_state=2, shuffle=True)
(1599, 11)
(1599, 3)


In [47]:
fold = 1
layers_dims = [11, 10, 8, 5, 3] 
for train_index, test_index in skf.split(X_index, Y_index):
    print("Fold: ",fold)
    fold += 1

    X_train, X_test = XX[train_index], XX[test_index]
    Y_train, Y_test = YY[train_index], YY[test_index]
    
    parameters = L_layer_model(X_train.T, Y_train.T, X_test.T, Y_test.T, layers_dims, learning_rate = 0.99, num_iterations = 20000, print_cost = True)
    print("------------------")

Fold:  1
(1438, 11)
(1438, 3)
(161, 11)
(161, 3)
Cost after iteration 0: 7.510584
Cost after iteration 500: 1.027898
Cost after iteration 1000: 1.024486
Cost after iteration 1500: 1.024092
Cost after iteration 2000: 1.020367
Cost after iteration 2500: 0.844679
Cost after iteration 3000: 0.816127
Cost after iteration 3500: 0.811492
Cost after iteration 4000: 0.807857
Cost after iteration 4500: 0.803279
Cost after iteration 5000: 0.797331
Cost after iteration 5500: 0.791358
Cost after iteration 6000: 0.785431
Cost after iteration 6500: 0.778346
Cost after iteration 7000: 0.770158
Cost after iteration 7500: 0.762617
Cost after iteration 8000: 0.756520
Cost after iteration 8500: 0.750627
Cost after iteration 9000: 0.744451
Cost after iteration 9500: 0.737658
Cost after iteration 10000: 0.729785
Cost after iteration 10500: 0.723739
Cost after iteration 11000: 0.718992
Cost after iteration 11500: 0.714935
Cost after iteration 12000: 0.711305
Cost after iteration 12500: 0.707932
Cost after it

Cost after iteration 500: 1.029067
Cost after iteration 1000: 1.027232
Cost after iteration 1500: 1.026546
Cost after iteration 2000: 0.996469
Cost after iteration 2500: 0.828330
Cost after iteration 3000: 0.813851
Cost after iteration 3500: 0.809417
Cost after iteration 4000: 0.805713
Cost after iteration 4500: 0.801534
Cost after iteration 5000: 0.796528
Cost after iteration 5500: 0.790526
Cost after iteration 6000: 0.782941
Cost after iteration 6500: 0.773827
Cost after iteration 7000: 0.765990
Cost after iteration 7500: 0.760212
Cost after iteration 8000: 0.754941
Cost after iteration 8500: 0.749557
Cost after iteration 9000: 0.744231
Cost after iteration 9500: 0.739094
Cost after iteration 10000: 0.733931
Cost after iteration 10500: 0.728363
Cost after iteration 11000: 0.721493
Cost after iteration 11500: 0.715212
Cost after iteration 12000: 0.710212
Cost after iteration 12500: 0.706060
Cost after iteration 13000: 0.702592
Cost after iteration 13500: 0.699507
Cost after iteration 

In [67]:
#Falta esccarlar sobre todos los datos :)