Regresión Logística

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Cargar datos

Normalización

In [None]:
def normalizar(X_train, X_test,y_train, y_test):
  X_media = np.concatenate((X_train, X_test), axis=0).mean(axis=0)
  X_std = np.concatenate((X_train, X_test), axis=0).std(axis=0)
  y_media = np.concatenate((y_train, y_test), axis=0).mean()
  y_std = np.concatenate((y_train, y_test), axis=0).std()

  X_train = (X_train - X_media)/X_std
  y_train = (y_train - y_media)/y_std
  X_test = (X_test - X_media)/X_std
  y_test = (y_test - y_media)/y_std

  return X_train, y_train, X_test, y_test


Regresión Logística

$\theta^{T}x = g(\theta^{T}x)$

$g(z) = \frac{1}{1+e^{-z}}$

In [None]:
# función sigmoidea

def predict(X, theta):
    z=np.dot(X,theta)
    return 1/(1+np.exp(-z))


**Función de costo para la regresión logística:**

*   Elemento de lista
*   Elemento de lista


$J(\theta) = - \frac{1}{m}[\sum_{i=1}^{m}y^{(i)}\log h_\theta(x^{(i)})+(1-y^{(i)})\log(1-h_\theta(x^{(i)}))]$

In [None]:
def cal_cost(theta, X, y):
    y_pred = predict(X, theta)
    cross_entropy = y * np.log(y_pred) + (1-y) * np.log(1-y_pred)
    return (-1/X.shape[0]) * np.sum(cross_entropy)

**Gradiente descendiente**

$\theta_j = \theta_j - \alpha \frac{1}{m} \sum_{i=1}^m (h_{\theta}(x^{(i)})-y^{(i)})x_j^{(i)}$

Donde : $h_{\theta}(x) = \frac{1}{1+e^{-z}}$


In [None]:
n_features = X_train.shape[1]
theta = np.zeros(n_features)
alpha = 0.01 # learning rate
iterations = 2000

def gradient_descent(X,y,theta):
    cost_history = np.zeros(iterations)
    for it in range(iterations):
        prediction = predict(X, theta)
        # m = prediction.shape[0]
        theta = theta - (1/prediction.shape[0])*alpha*( X.T.dot((prediction - y)))
        cost_history[it]  = cal_cost(theta,X,y)
    return theta, cost_history

**Exactitud (Accuracy)**




In [None]:
def accuracy(X, y, theta):
   prediction = predict(X, theta)

**KFolds**

In [42]:
import numpy as np

def listByClass(my_list, clase):
    #obtener los indices que corresponden a esa clase
    idx_class = [i for i, x in enumerate(my_list) if x == clase]
    list_class = np.zeros(len(idx_class))
    for i in range (len(idx_class)):
        indice = idx_class[i]
        list_class[i] = my_list[indice]
        
    return list_class
    
def kFolds(datos, k=3):
    list_class1 = listByClass(my_list, 0)
    list_class2 = listByClass(my_list, 1)
    
    datos_split = list()
    datos = list(datos)
    fold_size_class1 = int(len(list_class1) / k)
    fold_size_class2 = int(len(list_class2) / k)
    
    for i in range(k-1):
        fold = list()
        
        #clase 1
        startIdx = i * fold_size_class1
        endIdx = (i+1) * fold_size_class1
        fold.extend(list_class1[startIdx : endIdx])
        
        #clase 2
        startIdx = i * fold_size_class2
        endIdx = (i+1) * fold_size_class2
        fold.extend(list_class2[startIdx : endIdx])
        
        datos_split.append(fold)
        
    #agregar el ultimo subconjunto hasta el final del dataset
    fold = list()
    startIdx = (k-1) * fold_size_class1
    fold.extend(list_class1[startIdx :])
    startIdx = (k-1) * fold_size_class2
    fold.extend(list_class2[startIdx :])
    datos_split.append(fold)
    
    
    return datos_split


my_list = [0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,1,1,1,0,0]
resultFolds = kFolds(my_list, 3)
print(resultFolds)

[[0.0, 0.0, 0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]
