In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import minmax_scale as mms
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

In [2]:
def load_data(path,name):
    csv_path = os.path.join(path, name)
    return pd.read_csv(csv_path)

In [3]:
path = "C:/Users/DanielaFe7/Desktop/Maestrado/Redes/RedesNeurais"
name = "default_features_1059_tracks.txt"
data = load_data(path,name)
print("Loading music dataset ...",data.shape)
data.head()

Loading music dataset ... (1059, 70)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,60,61,62,63,64,65,66,67,68,69
0,7.161286,7.835325,2.911583,0.984049,-1.499546,-2.094097,0.576,-1.205671,1.849122,-0.425598,...,-1.504263,0.351267,-1.018726,-0.174878,-1.089543,-0.66884,-0.914772,-0.83625,-15.75,-47.95
1,0.225763,-0.094169,-0.603646,0.497745,0.874036,0.29028,-0.077659,-0.887385,0.432062,-0.093963,...,-0.495712,-0.465077,-0.157861,-0.157189,0.380951,1.088478,-0.123595,1.391141,14.91,-23.51
2,-0.692525,-0.517801,-0.788035,1.214351,-0.907214,0.880213,0.406899,-0.694895,-0.901869,-1.701574,...,-0.637167,0.14726,0.217914,2.718442,0.972919,2.081069,1.375763,1.063847,12.65,-8.0
3,-0.735562,-0.684055,2.058215,0.716328,-0.011393,0.805396,1.497982,0.114752,0.692847,0.052377,...,-0.178325,-0.065059,-0.724247,-1.020687,-0.75138,-0.385005,-0.012326,-0.392197,9.03,38.74
4,0.570272,0.273157,-0.279214,0.083456,1.049331,-0.869295,-0.265858,-0.401676,-0.872639,1.147483,...,-0.919463,-0.667912,-0.820172,-0.190488,0.306974,0.119658,0.271838,1.289783,34.03,-6.85


# Pre-processing Data Set

In [4]:
print("Scalling the data ...")
def scalling(data):
    data_scalling = mms(data)
    return data_scalling

Scalling the data ...


# NN L-Layer

In [6]:
def sigmoid(Z):
    S = 1 / (1 + np.exp(-Z))
    return S,Z

In [7]:
def derivade_sigmoid(z):
    gz,z = sigmoid(z) 
    return gz * (1-gz);

In [8]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network, (including input layer)

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.normal(0, 1, (layer_dims[l], layer_dims[l-1]))
        parameters['b' + str(l)] = np.random.random((layer_dims[l], 1))
      
    return parameters

In [9]:
def initialize_velocity(parameters):
    L = len(parameters) // 2 # number of layers in the neural networks (not input layer including)
    v = {}
    
    for l in range(L):
        v["dW" + str(l + 1)] = np.zeros_like(parameters["W" + str(l+1)])
        v["db" + str(l + 1)] = np.zeros_like(parameters["b" + str(l+1)])
        
    return v

In [10]:
def RMSE(Y_pred, Y):
    Y = Y.T
    Y_pred = Y_pred.T
    #print("RMSE: ", Y_pred.shape)
    rmse = (Y - Y_pred)**2
    rmse = np.sum(rmse, axis=0)
    return rmse / Y.shape[0]

# Forward Propagation in L - Layers

In [11]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A, W, b)
    return Z, cache

In [12]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
        
    cache = (linear_cache, activation_cache) #linear_cahce: A_prev,Wi,bi - activation_cache: Zi
    return A, cache

In [13]:
def L_model_forward(X,Y,parameters,pred=False):
    caches = []
    A = X                                     #(input size, number of examples)
    L = len(parameters) // 2                  # number of layers in the neural network (not input layer including)
    
    # Implement [LINEAR -> SIGMOID]*(L-1). To L-1 Layers
    for l in range(1, L):
        A_prev = A 
        W=parameters['W' + str(l)]
        b=parameters['b' + str(l)]
        A, cache = linear_activation_forward(A_prev, W, b, "sigmoid")
        caches.append(cache)
    
    # Implement LINEAR -> SIGMOID. Last layer
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")
    caches.append(cache)
    
    # Predicction
    if pred:
        # In predicction Task, we keep the same values
        return AL
    return AL, caches

In [14]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (- 1 / m) * np.sum(Y * np.log(AL) + (1 - Y) * (np.log(1 - AL)))
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    return cost

# Backward Propagation in L - Layers

In [15]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = (1/m) * np.dot(dZ,A_prev.T)
    db = (1/m) * np.sum(dZ,axis=1,keepdims=True)
    dA_prev = np.dot(W.T,dZ)
    return dA_prev, dW, db

In [16]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    
    if activation == "sigmoid":
        dZ = dA * derivade_sigmoid(activation_cache) # activation_cache = Z ; dA = np.dot(W.T,dZ) excep the first dA
    
    dA_prev, dW, db = linear_backward(dZ, linear_cache)    
    return dA_prev, dW, db

In [17]:
def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches) # the number of layers (not input layer including)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    # Initializing the backpropagation
    dAL =- (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # derivative of cost with respect to AL
    
    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
    current_cache = caches[L-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, 
                                                                                                  current_cache, "sigmoid")
    
    for l in reversed(range(L-1)):
        # lth layer: (SIGMOID -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] 
        current_cache = caches[l]
        dA_prev, dW, db = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, "sigmoid")
        grads["dA" + str(l + 1)] = dA_prev
        grads["dW" + str(l + 1)] = dW
        grads["db" + str(l + 1)] = db

    return grads

# Update Parameters

In [18]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in the neural network (not input layer including)

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l+1)]

    return parameters

# Update Parameters with Momentum

In [19]:
def update_parameters_with_momentum(parameters, grads, v, learning_rate, beta):
    L = len(parameters) // 2 # number of layers in the neural networks (not input layer including)
    
    # Momentum update for each parameter
    for l in range(L):
        v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * grads['dW' + str(l + 1)]
        v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * grads['db' + str(l + 1)]
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v["db" + str(l + 1)]
        
    return parameters, v

# L - layer model

In [31]:
m = []
def L_layer_model(X, Y, X_test, Y_test, layers_dims, learning_rate, num_iterations, print_cost=False):
    np.random.seed(1)
    costs = []                         
    
    # Parameters initialization.
    parameters = initialize_parameters_deep(layers_dims)
    
    #Velocity initialization.
    v = initialize_velocity(parameters)
    
    # Loop (gradient descent)
    for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> SIGMOID]*(L-1) -> LINEAR -> SIGMOID.
        AL, caches = L_model_forward(X,Y,parameters,pred=False)
        
        # Compute cost.
        cost = compute_cost(AL, Y)
    
        # Backward propagation.
        grads = L_model_backward(AL, Y, caches)
 
        # Update parameters. #Esto tem que quedar comentado si vc vai usar com Momemtum
        #parameters = update_parameters(parameters, grads, learning_rate)
        
        # Update parameters with Momentum.
        parameters, v = update_parameters_with_momentum(parameters, grads, v, learning_rate, beta = 0.99)
                
        # Print the cost every 100 training example
        if print_cost and i % 500 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 500 == 0:
            costs.append(cost)
            
    # Train Predicction
    Y_prediction =  L_model_forward(X,Y,parameters,pred=True)
    print("------------------")
    #print("Train Prediccion:")
    #print(Y_prediction.shape)
    #print(Y_prediction)
    print("Train RMSE: ",RMSE(Y_prediction, Y))  
    
    # Test Predicction
    print("------------------")
    Y_prediction =  L_model_forward(X_test,Y_test,parameters,pred=True)
    #print("Test Prediccion:")
    #print(Y_prediction.shape)
    #print(Y_prediction)
    print("Test RMSE: ",RMSE(Y_prediction, Y_test))  
    
        
    # plot the cost
    #plt.plot(np.squeeze(costs))
    #plt.ylabel('cost')
    #plt.xlabel('iterations (per tens)')
    #plt.title("Learning rate =" + str(learning_rate))
    #plt.show()
            
    
    return parameters

# Cross Validation: KFold

In [32]:
# We can not use StratifiedKFold because is an Regression problem and we have not labels, we need to scale the hole data then.
X_index = data
Y_index = data[["68","69"]]
X_index = X_index.drop("68", axis = 1)
X_index = X_index.drop("69", axis = 1)
print(X_index.shape)
print(Y_index.shape)
kf = KFold(n_splits=10,random_state=2,shuffle=True)
kf.get_n_splits(X_index)
print(kf)

# When the indexes are ready in (split function), pick the data that is already pre-processing
scaling_data = scalling(data)
XX = scaling_data
XX = np.delete(XX, np.s_[-1], axis=1)
XX = np.delete(XX, np.s_[-1], axis=1)
YY = scaling_data[:,-2:]
print(XX.shape)
print(YY.shape)


(1059, 68)
(1059, 2)
KFold(n_splits=10, random_state=2, shuffle=True)
(1059, 68)
(1059, 2)


In [33]:
fold = 1
layers_dims = [68, 10, 8, 5, 2] 
for train_index, test_index in kf.split(X_index):
    print("Fold: ",fold)
    fold += 1
   
    X_train, X_test = XX[train_index], XX[test_index]
    Y_train, Y_test = YY[train_index], YY[test_index]
    
    parameters = L_layer_model(X_train.T, Y_train.T, X_test.T, Y_test.T, layers_dims, learning_rate = 0.99, num_iterations = 20000, print_cost = True)
    print("------------------")

Fold:  1
Cost after iteration 0: 1.467737
Cost after iteration 500: 1.304763
Cost after iteration 1000: 1.295018
Cost after iteration 1500: 1.287046
Cost after iteration 2000: 1.282399
Cost after iteration 2500: 1.278571
Cost after iteration 3000: 1.272632
Cost after iteration 3500: 1.265089
Cost after iteration 4000: 1.259544
Cost after iteration 4500: 1.255194
Cost after iteration 5000: 1.251434
Cost after iteration 5500: 1.248067
Cost after iteration 6000: 1.244989
Cost after iteration 6500: 1.242165
Cost after iteration 7000: 1.239588
Cost after iteration 7500: 1.237228
Cost after iteration 8000: 1.235029
Cost after iteration 8500: 1.232940
Cost after iteration 9000: 1.230916
Cost after iteration 9500: 1.228917
Cost after iteration 10000: 1.226919
Cost after iteration 10500: 1.224917
Cost after iteration 11000: 1.222926
Cost after iteration 11500: 1.220970
Cost after iteration 12000: 1.219057
Cost after iteration 12500: 1.217184
Cost after iteration 13000: 1.215339
Cost after itera

Cost after iteration 3000: 1.278800
Cost after iteration 3500: 1.270877
Cost after iteration 4000: 1.265263
Cost after iteration 4500: 1.261281
Cost after iteration 5000: 1.258040
Cost after iteration 5500: 1.255247
Cost after iteration 6000: 1.252772
Cost after iteration 6500: 1.250533
Cost after iteration 7000: 1.248448
Cost after iteration 7500: 1.246428
Cost after iteration 8000: 1.244406
Cost after iteration 8500: 1.242341
Cost after iteration 9000: 1.240239
Cost after iteration 9500: 1.238136
Cost after iteration 10000: 1.236058
Cost after iteration 10500: 1.234012
Cost after iteration 11000: 1.231999
Cost after iteration 11500: 1.230028
Cost after iteration 12000: 1.228111
Cost after iteration 12500: 1.226255
Cost after iteration 13000: 1.224462
Cost after iteration 13500: 1.222731
Cost after iteration 14000: 1.221066
Cost after iteration 14500: 1.219477
Cost after iteration 15000: 1.217973
Cost after iteration 15500: 1.216555
Cost after iteration 16000: 1.215215
Cost after iter