## Regresiones + Gradient Descent

In [1]:
# Cargo el módulo de numpy
#-------------------------
import numpy as np
import matplotlib.pyplot as plt
import time
#Si queremos que las imágenes sean mostradas en una ventana emergente quitar el inline
%matplotlib  

Using matplotlib backend: Qt5Agg


In [66]:
# Definición de las clases
#=========================

# Definición de la clase para levantar (y dividir) los datos
#===========================================================
class Data(object):

    def __init__(self, path):
        self.dataset = self._build_dataset(path)

    def _build_dataset(self, path):
        # Armo una estructura de datos para guardarlos ahí
        #-------------------------------------------------
        structure = [('X1', np.float),
                     ('X2', np.float),
                     ('y', np.float)]
        
        # Abro el archivo lo recorro llenando la estructura creada línea a línea
        #-----------------------------------------------------------------------
        with open(path, encoding="utf8") as data_csv:

            data_gen = ((float(line.split(',')[0]), float(line.split(',')[1]), float(line.split(',')[2])) # add here + 10 in second value
                        for i, line in enumerate(data_csv) if i != 0)
            embeddings = np.fromiter(data_gen, structure)

        return embeddings
    
    # Separo los los datos (train y test)
    #------------------------------------
    def split(self, percentage): # 0.8
        X1 = self.dataset['X1']
        X2 = self.dataset['X2']
        y = self.dataset['y']

        permuted_idxs = np.random.permutation(X1.shape[0])

        train_idxs = permuted_idxs[0:int(percentage * X1.shape[0])]

        test_idxs = permuted_idxs[int(percentage * X1.shape[0]): X1.shape[0]]

        X_train = np.vstack((X1[train_idxs],X2[train_idxs]))
        X_test = np.vstack((X1[test_idxs],X2[test_idxs]))

        y_train = y[train_idxs]
        y_test = y[test_idxs]

        return X_train, X_test, y_train, y_test
                        
                        
# Clase base de la que heredan las que vayamos implementando
#-----------------------------------------------------------
# Es conveniente tener una clase base de la que vayan heredando las demás. Siempre habrá un método fit
# y un método predict. Pero en esta clase base puede haber definiciones de atributos comunes a todas
#===========================================================
class BaseModel(object):

    def __init__(self):
        self.model = None

    def fit(self, X, Y):
        return NotImplemented

    def predict(self, X):
        return NotImplemented


class ConstantModel(BaseModel):
    # El modelo constante solo saca la media de los datos y devuelve ese valor
    # Es útil para comparar. Ningún modelo debería ser peor que este.
    #-------------------------------------------------------------------------
    def fit(self, X, Y):
        W = Y.mean()
        self.model = W

    def predict(self, X):
        # La "predicción" consiste en devolver la media para todos los valores
        return np.ones(len(X)) * self.model

# Modelo de la regresión lineal
#==============================
class LinearRegression(BaseModel):
    # Este modelo de regresión lineal ajusta únicamente la pendiente, no contempla la ordenada al origen
    def fit(self, X, y):
        # Verificamos si X es un vector o una matriz
        if len(X.shape) == 1:
            # Esta es una manera de escribir la pseudo-inversa (X'.X)^(-1).X'.y
            W = X.T.dot(y) / X.T.dot(X)
        else:
            # Y esta es la manera con matrices
            W = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
        self.model = W

    def predict(self, X):
        return self.model * X
    
# Modelo que incluye la ordenada al origen (b)
# ============================================
class LinearRegressionWithB(BaseModel):

    def fit(self, X, y):
        # En el caso de ajustar con ordenada al origen le agregamos la columna de b con unos
        # (Le agrega la fila abajo y luego traspongo --> Vectores columna)
        X_expanded = np.vstack((X, np.ones(len(X)))).T
        W = np.linalg.inv(X_expanded.T.dot(X_expanded)).dot(X_expanded.T).dot(y)
        self.model = W

    def predict(self, X):
        X_expanded = np.vstack((X, np.ones(len(X)))).T
        return X_expanded.dot(self.model)

# Modelo de la regresión cuadrática
#==================================
class QuadraticRegression(BaseModel):

    def fit(self, X, y):
        # Armamos la matriz de ajuste
        X_expanded = np.vstack((X**2, X, np.ones(len(X)))).T
        W = np.linalg.inv(X_expanded.T.dot(X_expanded)).dot(X_expanded.T).dot(y)
        
        self.model = W

    def predict(self, X):
        X_expanded = np.vstack((X**2, X, np.ones(len(X)))).T
        return X_expanded.dot(self.model)

# Modelo de la regresión cuadrática
#==================================
class PolyRegression(BaseModel):
    
    def fit(self, X, y, n):
        # Tomo X y le agrego el término independiente
        X_expanded = np.vstack((X, np.ones(len(X))))
        for i in range(n-1):
            # Armamos la matriz de ajuste a partir del grado del polinomio
            X_n = X**(n-i)
            X_expanded = np.vstack((X_n, X_expanded))
         
        X_expanded = X_expanded.T
            
        W = np.linalg.inv(X_expanded.T.dot(X_expanded)).dot(X_expanded.T).dot(y)
        
        self.model = W

    def predict(self, X, n):
        # Tomo X y le agrego el término independiente
        X_expanded = np.vstack((X, np.ones(len(X))))
        for i in range(n-1):
            # Armamos la matriz de ajuste a partir del grado del polinomio
            X_n = X**(n-i)
            X_expanded = np.vstack((X_n, X_expanded))
         
        X_expanded = X_expanded.T
        
        return X_expanded.dot(self.model)
    
    
# Clases de métricas
#===================

# Clase madre
class Metric(object):
    def __call__(self, target, prediction):
        return NotImplemented

# Por ahora solo esta --> Error cuadrático medio
class MSE(Metric):
    def __call__(self, target, prediction):
        n = target.size
        return np.sum((target - prediction) ** 2) / n

class Precision(Metric):
    def __call__(self, truth, prediction):
        
        # Encontramos los True Positive
        true_pos_mask = (prediction == 1) & (truth == 1)
        true_pos = true_pos_mask.sum()
        
        # Encontramos los False Positive
        false_pos_mask = (prediction == 1) & (truth == 0)
        false_pos = false_pos_mask.sum()
        
        return true_pos / (true_pos + false_pos)

class Recall (Metric):
    def __call__(self, truth, prediction):
        
        # Encontramos los True Positive
        true_pos_mask = (prediction == 1) & (truth == 1)
        true_pos = true_pos_mask.sum()
        
        # Encontramos los False Negative
        false_neg_mask = (prediction == 0) & (truth == 1)
        false_neg = false_neg_mask.sum()
        
        return true_pos / (true_pos + false_neg)
        
class Accuracy (Metric):
    def __call__(self, truth, prediction):
        
        # Encontramos los True Positive
        true_pos_mask = (prediction == 1) & (truth == 1)
        true_pos = true_pos_mask.sum()
        
        # Encontramos los False Positive
        false_pos_mask = (prediction == 1) & (truth == 0)
        false_pos = false_pos_mask.sum()
        
        # Encontramos los True Negative
        true_neg_mask = (prediction == 0) & (truth == 0)
        true_neg = true_neg_mask.sum() 
    
        # Encontramos los False Negative
        false_neg_mask = (prediction == 0) & (truth == 1)
        false_neg = false_neg_mask.sum()
    
        return (true_pos + true_neg) / (true_pos + true_neg + false_pos + false_neg)

def k_folds(X_train, y_train, k=5):
    l_regression = LinearRegression()
    error = MSE()

    chunk_size = int(len(X_train) / k)
    mse_list = []
    for i in range(0, len(X_train), chunk_size):
        end = i + chunk_size if i + chunk_size <= len(X_train) else len(X_train)
        new_X_valid = X_train[i: end]
        new_y_valid = y_train[i: end]
        new_X_train = np.concatenate([X_train[: i], X_train[end:]])
        new_y_train = np.concatenate([y_train[: i], y_train[end:]])

        l_regression.fit(new_X_train, new_y_train)
        prediction = l_regression.predict(new_X_valid)
        mse_list.append(error(new_y_valid, prediction))

    mean_MSE = np.mean(mse_list)

    return mean_MSE
    
    
def gradient_descent(X_train, y_train, lr=0.01, amt_epochs=100):
    """
    lr: learning rate
    amt_epochs: cantidad de iteraciones
    
    shapes: 
        X_t: nxm
        Y_y: nx1
        W: mx1
    """
    n = X_train.shape[0]
    m = X_train.shape[1]
    # print('X.shape:{}x{}\n'.format(n,m))
        
    # Inicializamos los pesos
    W = np.random.randn(m).reshape(m,1)
    print('W_inicial_{}'.format(W.reshape(-1)))
    
    for i in range(amt_epochs):
        # Calculo la estimación
        #y_hat=X_train*W
        y_hat=np.matmul(X_train,W)
        
        # Calculo el error
        error=y_train-y_hat
        
        # Calculo el gradiente
        grad_sum = np.sum(error*X_train,axis=0)
        grad_mul =-2/n*grad_sum  #1xm
        gradient = np.transpose(grad_mul).reshape(-1,1) #mx1
        
        # Actualizo el valor
        W = W - (lr*gradient)
    
    return W


def stochastic_gradient_descent(X_train, y_train, lr=0.01, amt_epochs=100):
    """
    lr: learning rate
    amt_epochs: cantidad de iteraciones
    
    shapes: 
        X_t: nxm
        Y_y: nx1
        W: mx1
    """
    n = X_train.shape[0]
    m = X_train.shape[1]
    # print('X.shape:{}x{}\n'.format(n,m))
        
    # Inicializamos los pesos
    W = np.random.randn(m).reshape(m,1)
    print('W_inicial_{}'.format(W.reshape(-1)))
    
    for i in range(amt_epochs):
        idx=np.random.permutation(X_train.shape[0])
        X_train = X_train[idx]
        y_train = y_train[idx]
        
        for j in range(n):
        
            # Calculo la estimación
            #y_hat=X_train*W
            y_hat=np.matmul(X_train[j].reshape(1,-1),W)

            # Calculo el error
            error=y_train[j]-y_hat

            # Calculo el gradiente
            grad_sum = error*X_train[j]
            grad_mul =-2/n*grad_sum  #1xm
            gradient = np.transpose(grad_mul).reshape(-1,1) #mx1

            # Actualizo el valor
            W = W - (lr*gradient)
    
    return W

def mini_batch_gradient_descent(X_train, y_train, lr=0.01, amt_epochs=100):
    """
    shapes:
        X_t = nxm
        y_t = nx1
        W = mx1
    """
    b = 16
    n = X_train.shape[0]
    m = X_train.shape[1]

    # initialize random weights
    W = np.random.randn(m).reshape(m, 1)

    for i in range(amt_epochs):
        idx = np.random.permutation(X_train.shape[0])
        X_train = X_train[idx]
        y_train = y_train[idx]

        batch_size = int(len(X_train) / b)
        for i in range(0, len(X_train), batch_size):
            end = i + batch_size if i + batch_size <= len(X_train) else len(X_train)
            batch_X = X_train[i: end]
            batch_y = y_train[i: end]

            prediction = np.matmul(batch_X, W)  # nx1
            error = batch_y - prediction  # nx1

            grad_sum = np.sum(error * batch_X, axis=0)
            grad_mul = -2/n * grad_sum  # 1xm
            gradient = np.transpose(grad_mul).reshape(-1, 1)  # mx1

            W = W - (lr * gradient)

    return W


def mini_batch_logistic_gradient_descent(X_train, y_train, lr=0.01, amt_epochs=100):
    """
    shapes:
        X_t = nxm
        y_t = nx1
        W = mx1
    """
    b = 16
    n = X_train.shape[0]
    m = X_train.shape[1]

    # initialize random weights
    W = np.random.randn(m).reshape(m, 1)

    for i in range(amt_epochs):
        idx = np.random.permutation(X_train.shape[0])
        X_train = X_train[idx]
        y_train = y_train[idx]

        batch_size = int( len(X_train) / b)
        for i in range(0, len(X_train), batch_size):
            end = i + batch_size if i + batch_size <= len(X_train) else len(X_train)
            batch_X = X_train[i: end]
            batch_y = y_train[i: end]
            prediction = 1/(1+np.exp(-np.matmul(batch_X, W))) #Ojo que no es la predicción posta!!
            error = batch_y - prediction  # nx1

            grad_sum = np.sum(error * batch_X, axis=0)
            grad_mul = -2/n * grad_sum  # 1xm
            gradient = np.transpose(grad_mul).reshape(-1, 1)  # mx1

            W = W - (lr * gradient)

    return W

In [68]:
# Armamos el main
#----------------
if __name__ == '__main__':
    
    # Llamo al dataset sobre el que voy a trabajar
    #---------------------------------------------
    dataset = Data('../clase_6_dataset.txt')
       
    # Hacemos la partición del dataset
    #---------------------------------
    X_train, X_test, y_train, y_test = dataset.split(1)
    
    lr_1 = 0.001
    amp_epochs_1=1000
    W = mini_batch_logistic_gradient_descent(X_train.T, y_train.reshape(-1,1), lr=lr_1, amt_epochs=amp_epochs_1)
    
    print('W encontrado: {}'.format(W))
    
    # Calculo la predicción (lo hago acá en el ruedo pero en rigor debería crear una def) 
    prediction = (np.matmul(X_train.T,W) > 0.5)
    truth = y_train.reshape(-1,1)
    
    # CALCULAR LAS MÉTRICAS
    precision = Precision()
    accuracy = Accuracy()
    recall = Recall()
    
    prec_test = precision(truth,prediction)
    acc_test = accuracy(truth,prediction)
    rec_test = recall(truth,prediction)
    print ('\n\n Precisión: {} \n\n Accuracy: {}  \n\n Recall: {}'.format(prec_test,acc_test,rec_test))

W encontrado: [[ 0.00685739]
 [-0.00022121]]


 Precisión: 0.8333333333333334 

 Accuracy: 0.6363636363636364  

 Recall: 0.5


####  Cambio a tema oscuro

In [11]:
#!jt -t chesterish
#!jt -t monokai
#!jt -t solarizedd -f fira -fs 115
!jt -t oceans16 -T -N