In [1]:
import numpy as np
def relu(x):
  return np.maximum(0, x)

def reluPrime(x):
  return x > 0

In [2]:
def linear(x):
    return x

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=-1,keepdims=True)

In [3]:
# Mean Square Error -> usada para regresión (con activación lineal)
def mse(y, y_hat):
#     print(y.shape, y_hat.shape)
    return np.mean((y_hat - y.reshape(y_hat.shape))**2)

# Binary Cross Entropy -> usada para clasificación binaria (con sigmoid)
def bce(y, y_hat):
    return - np.mean(y.reshape(y_hat.shape)*np.log(y_hat) - (1 - y.reshape(y_hat.shape))*np.log(1 - y_hat))

# Cross Entropy (aplica softmax + cross entropy de manera estable) -> usada para clasificación multiclase
def crossentropy(y, y_hat):
#     print(y)
#     print(len(y_hat))
#     print(np.arange(len(y_hat)))
    logits = y_hat[np.arange(len(y_hat)),y]
    entropy = - logits + np.log(np.sum(np.exp(y_hat),axis=-1))
    return entropy.mean()

In [4]:
def grad_mse(y, y_hat):
    return y_hat - y.reshape(y_hat.shape)

def grad_bce(y, y_hat):
    return y_hat - y.reshape(y_hat.shape)

def grad_crossentropy(y, y_hat):
    answers = np.zeros_like(y_hat)
    answers[np.arange(len(y_hat)),y] = 1    
    return (- answers + softmax(y_hat)) / y_hat.shape[0]

In [5]:
# clase base MLP 

class MLP():
  def __init__(self, D_in, H, D_out, loss, grad_loss, activation):
    # pesos de la capa 1
    self.w1, self.b1 = np.random.normal(loc=0.0,
                                  scale=np.sqrt(2/(D_in+H)),
                                  size=(D_in, H)), np.zeros(H)
    # pesos de la capa 2
    self.w2, self.b2 = np.random.normal(loc=0.0,
                                  scale=np.sqrt(2/(H+D_out)),
                                  size=(H, D_out)), np.zeros(D_out)
    self.ws = []
    # función de pérdida y derivada
    self.loss = loss
    self.grad_loss = grad_loss
    # función de activación
    self.activation = activation

  def __call__(self, x):
    # salida de la capa 1
    self.h_pre = np.dot(x, self.w1) + self.b1
    self.h = relu(self.h_pre)
    # salida del MLP
    y_hat = np.dot(self.h, self.w2) + self.b2 
    return self.activation(y_hat)
    
  def fit(self, X, Y, epochs = 100, lr = 0.001, batch_size=None, verbose=True, log_each=1):
    batch_size = len(X) if batch_size == None else batch_size
    batches = len(X) // batch_size
#     print(batches)
    l = []
    for e in range(1,epochs+1):     
        # Mini-Batch Gradient Descent
        _l = []
        for b in range(batches):
            # batch de datos
            x = X[b*batch_size:(b+1)*batch_size]
            y = Y[b*batch_size:(b+1)*batch_size]
#             print(x)
            # salida del perceptrón
            y_pred = self(x)
#             print(y_pred.shape, y.shape)
            # función de pérdida
            loss = self.loss(y, y_pred)
            _l.append(loss)        
            # Backprop 
            dldy = self.grad_loss(y, y_pred) 
            grad_w2 = np.dot(self.h.T, dldy)
            grad_b2 = dldy.mean(axis=0)
            dldh = np.dot(dldy, self.w2.T)*reluPrime(self.h_pre)      
            grad_w1 = np.dot(x.T, dldh)
            grad_b1 = dldh.mean(axis=0)
            # Update (GD)
            self.w1 = self.w1 - lr * grad_w1
            self.b1 = self.b1 - lr * grad_b1
            self.w2 = self.w2 - lr * grad_w2
            self.b2 = self.b2 - lr * grad_b2
        l.append(np.mean(_l))
        # guardamos pesos intermedios para visualización
        self.ws.append((
            self.w1.copy(),
            self.b1.copy(),
            self.w2.copy(),
            self.b2.copy()
        ))
        if verbose and not e % log_each:
            print(f'Epoch: {e}/{epochs}, Loss: {np.mean(l):.5f}')

  def predict(self, ws, x):
    w1, b1, w2, b2 = ws
    h = relu(np.dot(x, w1) + b1)
    y_hat = np.dot(h, w2) + b2
    return self.activation(y_hat)

In [6]:
# MLP para regresión
class MLPRegression(MLP):
    def __init__(self, D_in, H, D_out):
        super().__init__(D_in, H, D_out, mse, grad_mse, linear)

# MLP para clasificación binaria
class MLPBinaryClassification(MLP):
    def __init__(self, D_in, H, D_out):
        super().__init__(D_in, H, D_out, bce, grad_bce, sigmoid)

# MLP para clasificación multiclase
class MLPClassification(MLP):
    def __init__(self, D_in, H, D_out):
        super().__init__(D_in, H, D_out, crossentropy, grad_crossentropy, linear)

In [7]:

import matplotlib.pyplot as plt

data = np.genfromtxt('winequality-red.csv',delimiter=',')

trainPorcen = int(len(data)/100*90)
testPorcen = int(len(data)/100*10)
print(trainPorcen, testPorcen)

X_test = data[trainPorcen:, :11]
y_test = data[trainPorcen:, 11]

X_train = data[:trainPorcen, :11]
y_train = data[:trainPorcen, 11]

# print(X_test,X_train)

# X = data[:, :11]
# y = data[:, 11]
y_train = np.array([int(e) for e in y_train])
y_train = np.squeeze(y_train)


y_test = np.array([int(e) for e in y_test])
y_test = np.squeeze(y_test)

# print(y_train)

# print(X)
# print(y_trian)
# print(X[y==3, 4])
X_train.shape, y_train.shape
X_mean, X_std = X_train.mean(axis=0), X_train.std(axis=0)
X_norm = (X_train - X_mean) / X_std
# print( X_norm)


1439 159


In [8]:
model = MLPClassification(D_in=11, H=10, D_out=11)
epochs, lr = 10, 0.2
model.fit(X_norm, y_train, epochs, lr, log_each=10)

Epoch: 10/10, Loss: 2.27858


In [9]:
last = model.ws.pop()
# print(last)
all_theta = model.predict(last,X_test)
print(all_theta)

[[-27.20080125 -10.2916819  -22.44421062 ...  -5.11936246 -11.28769504
   -2.41987071]
 [-12.25452676  -0.64423429  -6.73750781 ...  -4.2027472   -4.66031318
   -2.08059335]
 [-28.25358402 -12.43344126 -24.2666966  ...  -2.52708203 -10.30317958
   -1.58224745]
 ...
 [-16.309514    -0.91180104  -8.12327109 ...  -7.56939623  -6.20096405
   -1.49588234]
 [-17.28929656  -1.24742436  -8.63892352 ...  -8.31611239  -6.51812314
   -1.28785267]
 [-15.18912627  -3.12092466 -10.34233677 ...  -4.59394473  -6.06513688
   -2.49019291]]


In [10]:
p = np.argmax(all_theta, axis = 1)
print(p,y_test)

[6 6 6 6 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 5 6 6 6 6 6 6 6 6 5 6
 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 5 6 5 6 5 6] [6 7 6 5 5 6 6 5 5 5 8 7 7 7 5 6 6 6 5 5 7 6 4 6 6 5 5 7 4 7 3 5 5 6 5 5 7
 5 7 3 5 4 5 4 5 4 5 5 5 5 6 6 5 5 5 7 6 5 6 6 6 5 5 5 6 6 3 6 6 6 5 6 5 6
 6 6 6 5 6 5 5 6 4 5 5 6 5 6 6 6 6 6 5 6 5 7 6 6 6 5 5 6 7 6 6 7 6 5 5 5 8
 5 5 6 5 6 7 5 6 5 5 5 5 5 5 5 6 6 5 5 6 6 6 5 6 6 6 6 6 6 5 6 5 5 5 7 6 6
 6 6 5 6 6 6 6 5 6 6 5 6]


In [11]:
print('Test Set Accuracy: %f' % (np.mean(p == y_test[:]) * 100))

Test Set Accuracy: 45.625000
