# Parte 2: Desarrollo de la Red Neuronal

In [271]:
from tkinter import X, Y
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

all_data = pd.read_csv("pasajeros_actualizado.csv")

# Extraer columnas de entrada, escalarlas dividiendo por 255
all_inputs = all_data.iloc[:, 0:16].values
all_outputs = all_data.iloc[:, -1].values
scaler = StandardScaler()
all_inputs = scaler.fit_transform(all_inputs)

# Dividir los conjuntos de datos de entrenamiento y prueba
X_train, X_test, Y_train, Y_test = train_test_split(all_inputs, all_outputs, test_size=1 / 3)
n = X_train.shape[0]
print(n)

# Funciones de activación
relu = lambda x: np.maximum(x, 0)
logistic = lambda x: 1 / (1 + np.exp(-x))

# Construir red neuronal con pesos y sesgos inicializados aleatoriamente
np.random.seed(79)
w_hidden = np.random.rand(8, 16)        # (8x16)
w_hidden2 = np.random.rand(4, 8)        # (4x8)
w_output = np.random.rand(1, 4)         # (1x4)

b_hidden = np.random.rand(8, 1)         # (8x1)
b_hidden2 = np.random.rand(4, 1)        # (4x1)
b_output = np.random.rand(1, 1)         # (1x1)

# Ejecutar entradas a través de la red neuronal para obtener salidas predichas
def forward_prop(X):
    Z1 = w_hidden @ X + b_hidden        # (8x16) @ (16x1) + (8x1) = (8x1)
    A1 = relu(Z1)
    Z2 = w_hidden2 @ A1 + b_hidden2     # (4x8) @ (8x1) + (4x1) = (4x1)
    A2 = relu(Z2)
    Z3 = w_output @ A2 + b_output       # (1x4) @ (4x1) + (1x1) = (1x1)
    A3 = logistic(Z3)
    return Z1, A1, Z2, A2, Z3, A3

# Calculo de precisión
test_predictions = forward_prop(X_test.transpose())[4]  # me interesa solo la capa de salida, A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0])

print(accuracy)

# Tasa de aprendizaje
L = 0.01

# Derivadas de las funciones de activación
d_relu = lambda x: x > 0
d_logistic = lambda x: np.exp(-x) / (1 + np.exp(-x)) ** 2

# Función de backward propagation
def backward_prop(Z1, A1, Z2, A2, Z3, A3, X, Y):
    dC_dA3 =   2 * A3 - 2 * Y       # (1x1)  == (1x1) A3
    dA3_dZ3 =  d_logistic(Z3)       # (1x1)
    dZ3_dA2 =  w_output             # (1x4)
    dZ3_dW3 =  A2                   # (4x1)
    dZ3_dB3 =  1                    # (1x1)
    
    dA2_dZ2 =  d_relu(Z2)           # (4x1)
    dZ2_dA1 =  w_hidden2            # (4x8)
    dZ2_dW2 =  A1                   # (8x1)
    dZ2_dB2 =  1                    # (1x1)
    
    dA1_dZ1 =  d_relu(Z1)           # (8x1)
    dZ1_dW1 =  X                    # (16x1)
    dZ1_dB1 =  1                    # (1x1)
    
    dC_dW3 = dC_dA3 @ dA3_dZ3 @ dZ3_dW3.T       # (1x1) @ (1x1) @ (1x4) = (1x4)   == (1x4)  W3 
    dC_dB3 = dC_dA3 @ dA3_dZ3 * dZ3_dB3         # (1x1) @ (1x1) * (1x1) = (1x1)   == (1x1)  B3
    dC_dA2 = (dC_dA3 @ dA3_dZ3 @ dZ3_dA2).T     # (1x1) @ (1x1) @ (1x4) = (4x1)   == (4x1)  A2
    
    dC_dW2 = dC_dA2.T @ (dA2_dZ2 * dZ2_dW2.T)   # (1x4) @ ((4x1) * (1x8)) = (1x8) == (4x8)  W2
    dC_dB2 = dC_dA2 @ dA2_dZ2 * dZ2_dB2         # (1x4) @ (4x1) * (1x1) = (1x1)   == (4x1)  B2
    dC_dA1 = dC_dA2.T @ dA2_dZ2.T @ dZ2_dA1     # (4x1) @ (1x4) @ (4x8) = (4x8)   == (8x1)  A1
    
    dC_dW1 = dC_dA1 @ dA1_dZ1 @ dZ1_dW1.T       # (4x8) @ (8X1) @ (1X16) = (4x16) == (8x16) W1
    dC_dB1 = dA1_dZ1.T @ dC_dA1.T * dZ1_dB1     # (1x8) @ (8x4) * (1x1) = (1x4)   == (8x1)  B1
    
    return dC_dW1, dC_dB1, dC_dW2, dC_dB2, dC_dW3, dC_dB3


for i in range(150_000):
    # seleccionar aleatoriamente uno de los datos de entrenamiento
    idx = np.random.choice(n, 1, replace=False)
    X_sample = X_train[idx].transpose()
    Y_sample = Y_train[idx]

    # pasar datos seleccionados aleatoriamente a través de la red neuronal
    Z1, A1, Z2, A2, Z3, A3 = forward_prop(X_sample)

    # distribuir error a través de la retropropagación
    # y devolver pendientes para pesos y sesgos
    dW1, dB1, dW2, dB2, dW3, dB3 = backward_prop(Z1, A1, Z2, A2, Z3, A3, X_sample, Y_sample)

    # actualizar pesos y sesgos
    w_hidden -= L * dW1
    b_hidden -= L * dB1
    w_hidden2 -= L * dW2
    b_hidden2 -= L * dB2
    w_output -= L * dW3
    b_output -= L * dB3
    
print("Shapes:")
print("w_hidden:", w_hidden.shape)
print("dW1:", dW1.shape)
print("w_hidden2:", w_hidden2.shape)
print("dW2:", dW2.shape)
print("w_output:", w_output.shape)
print("dW1:", dW3.shape)

# Calculo de precisión
test_predictions = forward_prop(X_test.transpose())[4]  # me interesa solo la capa de salida, A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0])

print(accuracy)


17317
0.688878623397638


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 4)

In [177]:
'''from sympy import *

W1, W2, W3, B1, B2, B3, A1, A2, A3, Z1, Z2, Z3, X, Y = \
    symbols('W1 W2 W3 B1 B2 B3 A1 A2 A3 Z1 Z2 Z3 X Y')

# Derivada de la función costo respecto A3
C = (A3 - Y)**2
dC_dA3 = diff(C, A3)
print("dC_dA3 = ", dC_dA3) # 2*A3 - 2*Y

# Derivada de A3 respecto de Z3
logistic = lambda x: 1 / (1 + exp(-x))
_A3 = logistic(Z3)
dA3_dZ3 = diff(_A3, Z3)
print("dA3_dZ3 = ", dA3_dZ3) # exp(-Z3)/(1 + exp(-Z3))**2

# Derivada de Z3 respecto a A2
_Z3 = A2*W3 + B3
dZ3_dA2 = diff(_Z3, A2)
print("dZ3_dA2 = ", dZ3_dA2) # W3

# Derivada de Z3 respecto a W3
dZ3_dW3 = diff(_Z3, W3)
print("dZ3_dW3 = ", dZ3_dW3) # A2

# Derivada de Z3 respecto a B3
dZ3_dB3 = diff(_Z3, B3)
print("dZ3_dB3 = ", dZ3_dB3) # 1

# Derivada de A2 respecto de Z2
_A2 = logistic(Z2)
dA2_dZ2 = diff(_A2, Z2)
print("dA2_dZ2 = ", dA2_dZ2) # exp(-Z2)/(1 + exp(-Z2))**2

# Derivada de Z2 respecto a A1
_Z2 = A1*W2 + B2
dZ2_dA1 = diff(_Z2, A1)
print("dZ2_dA1 = ", dZ2_dA1) # W2

# Derivada de Z2 respecto a W2
dZ2_dW2 = diff(_Z2, W2)
print("dZ2_dW2 = ", dZ2_dW2) # A1

# Derivada de Z2 respecto a B2
dZ2_dB2 = diff(_Z2, B2)
print("dZ2_dB2 = ", dZ2_dB2) # 1

# Derivada de A1 respecto de Z1
relu = lambda x: Max(x, 0)
_A1 = relu(Z1)

d_relu = lambda x: x > 0 # Pendiente es 1 para los positivos, 0 para los negativos
dA1_dZ1 = d_relu(Z1)
print("dA1_dZ1 = ", dA1_dZ1) # Z1 > 0

# Derivada de Z1 respecto a W1
_Z1 = X*W1 + B1
dZ1_dW1 = diff(_Z1, W1)
print("dZ1_dW1 = ", dZ1_dW1) # X

# Derivada de Z1 respecto a B1
dZ1_dB1 = diff(_Z1, B1)
print("dZ1_dB1 = ", dZ1_dB1) # 1
'''

'from sympy import *\n\nW1, W2, W3, B1, B2, B3, A1, A2, A3, Z1, Z2, Z3, X, Y =     symbols(\'W1 W2 W3 B1 B2 B3 A1 A2 A3 Z1 Z2 Z3 X Y\')\n\n# Derivada de la función costo respecto A3\nC = (A3 - Y)**2\ndC_dA3 = diff(C, A3)\nprint("dC_dA3 = ", dC_dA3) # 2*A3 - 2*Y\n\n# Derivada de A3 respecto de Z3\nlogistic = lambda x: 1 / (1 + exp(-x))\n_A3 = logistic(Z3)\ndA3_dZ3 = diff(_A3, Z3)\nprint("dA3_dZ3 = ", dA3_dZ3) # exp(-Z3)/(1 + exp(-Z3))**2\n\n# Derivada de Z3 respecto a A2\n_Z3 = A2*W3 + B3\ndZ3_dA2 = diff(_Z3, A2)\nprint("dZ3_dA2 = ", dZ3_dA2) # W3\n\n# Derivada de Z3 respecto a W3\ndZ3_dW3 = diff(_Z3, W3)\nprint("dZ3_dW3 = ", dZ3_dW3) # A2\n\n# Derivada de Z3 respecto a B3\ndZ3_dB3 = diff(_Z3, B3)\nprint("dZ3_dB3 = ", dZ3_dB3) # 1\n\n# Derivada de A2 respecto de Z2\n_A2 = logistic(Z2)\ndA2_dZ2 = diff(_A2, Z2)\nprint("dA2_dZ2 = ", dA2_dZ2) # exp(-Z2)/(1 + exp(-Z2))**2\n\n# Derivada de Z2 respecto a A1\n_Z2 = A1*W2 + B2\ndZ2_dA1 = diff(_Z2, A1)\nprint("dZ2_dA1 = ", dZ2_dA1) # W2\n\n# Deri

In [255]:
'''# Tasa de aprendizaje
L = 0.01

# Derivadas de las funciones de activación
d_relu = lambda x: x > 0
d_logistic = lambda x: np.exp(-x) / (1 + np.exp(-x)) ** 2

# Función de backward propagation
def backward_prop(Z1, A1, Z2, A2, Z3, A3, X, Y):
    dC_dA3 =   2 * A3 - 2 * Y       # (1x1)
    dA3_dZ3 =  d_logistic(Z3)       # (1x1)
    dZ3_dA2 =  w_output             # (1x4)
    dZ3_dW3 =  A2                   # (4x1)
    dZ3_dB3 =  1                    # (1x1)
    
    dA2_dZ2 =  d_relu(Z2)           # (4x1)
    dZ2_dA1 =  w_hidden2            # (4x8)
    dZ2_dW2 =  A1                   # (8x1)
    dZ2_dB2 =  1                    # (1x1)
    
    dA1_dZ1 =  d_relu(Z1)           # (8x1)
    dZ1_dW1 =  X                    # (16x1)
    dZ1_dB1 =  1                    # (1x1)
    
    dC_dW3 = dC_dA3 @ dA3_dZ3 @ dZ3_dW3.T               # (1x1) @ (1x1) @ (1x4) = (1x4)
    dC_dB3 = dC_dA3 @ dA3_dZ3 * dZ3_dB3                 # (1x1) @ (1x1) * (1x1) = (1x1)
    dC_dA2 = dC_dA3 @ dA3_dZ3 @ dZ3_dA2                 # (1x1) @ (1x1) @ (1x4) = (1x4)
    
    dC_dW2 = dZ2_dW2 @ (dA2_dZ2 * dC_dA2)               # (8x1) @ (1x4) = (8x4)
    dC_dB2 = dC_dA2 @ dA2_dZ2.T * dZ2_dB2               # (1x1) @ (1x4) * (1x1) = (1x4)
    dC_dA1 = dZ2_dA1.T @ (dA2_dZ2 * dC_dA2)             # (8x4) @ ((4x1) * (1x4)) = (8x4) @ (4x1) = (8x1)
    
    dC_dW1 = (dZ1_dW1 @ (dA1_dZ1 * dC_dA1).T)           # (16x1) @ ((8x1) * (8x1)).T = ((16x1) @ (1x8)) = (8x16)
    dC_dB1 = dZ1_dB1 * (dA1_dZ1.T * dC_dA1)             # (1x1) * ((1x8) * (8x1)) = (1x1) * (8x1)
    

    return dC_dW1, dC_dB1, dC_dW2, dC_dB2, dC_dW3, dC_dB3


for i in range(150_000):
    # seleccionar aleatoriamente uno de los datos de entrenamiento
    idx = np.random.choice(n, 1, replace=False)
    X_sample = X_train[idx].transpose()
    Y_sample = Y_train[idx].reshape(1, -1)

    # pasar datos seleccionados aleatoriamente a través de la red neuronal
    Z1, A1, Z2, A2, Z3, A3 = forward_prop(X_sample)

    # distribuir error a través de la retropropagación
    # y devolver pendientes para pesos y sesgos
    dW1, dB1, dW2, dB2, dW3, dB3 = backward_prop(Z1, A1, Z2, A2, Z3, A3, X_sample, Y_sample)
    
    # actualizar pesos y sesgos
    w_hidden -= L * dW1.T
    b_hidden -= L * dB1
    w_hidden2 -= L * dW2
    b_hidden2 -= L * dB2
    w_output -= L * dW3
    b_output -= L * dB3
    
print("Shapes:")
print("w_hidden:", w_hidden.shape)
print("dW1:", dW1.shape)
print("w_hidden2:", w_hidden2.shape)
print("dW2:", dW2.shape)
print("w_output:", w_output.shape)
print("dW1:", dW3.shape)

# Calculo de precisión
test_predictions = forward_prop(X_test.transpose())[4]  # me interesa solo la capa de salida, A2
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test)
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0])

print(accuracy)
'''

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 1)