In [7]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import pandas as pd
import sys

In [8]:
np.set_printoptions(threshold=sys.maxsize)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [9]:
def Leer_Datos(file_name):
    return pd.read_csv(file_name)

def Normalizar_Datos(data):  
    mean_data = np.mean(data)
    standard_dev = np.std(data)
    data = data - mean_data
    data = data / standard_dev
    return data, mean_data, standard_dev

def Sigmoidal(X, W):
    result = np.matmul(X, W)
    result = np.exp(-result)
    result = result + 1
    result = 1 / result
    return result

def Separar_X_y(data):
    n = data.shape[1]
    X = data[:, :n-1]
    y = data[:, n-1:]
    return X, y

def Crear_Entrenamiento_Prueba(data):
    num_rows = data.shape[0]
    train_percentage = 0.7
    row_split_data = int(num_rows * train_percentage)
    training, test = data[:row_split_data, :], data[row_split_data:, :]
    return training, test

def Crear_Pesos(X):
    return np.random.rand(X.shape[1])

def Calcular_Funcion_Costo(X, y, W):
    pred = Sigmoidal(X, W)
    result = np.sum( (y * np.log(pred)) + ((1-y) * np.log(1-pred)) )
    result = (-result)/X.shape[0]
    return result

def Calcular_Gradiente(X, y, pred):
    gradient = pred - y
    gradient = np.matmul(np.transpose(X), gradient)
    gradient = np.divide(gradient, y.shape[0])
    return gradient

def Gradiente_Descendiente(X, y, W, num_iter, learn_rate):
    costs = np.zeros(num_iter)
    for i in range(num_iter):
        pred = Sigmoidal(X, W)
        gradient = Calcular_Gradiente(X, y, pred)
        result = np.multiply(gradient, learn_rate)
        W = W - result
        costs[i] = Calcular_Funcion_Costo(X, y, W)
    return W, costs

def Calcular_Accuraccy(X, y, W):
    pred = Sigmoidal(X, W)
    pred = pred > 0.5
    result = np.logical_xor(np.logical_not(pred), y)
    return np.sum(result) / y.shape[0]
    
def Crear_k_folds(data, k):
    np.random.shuffle(data)
    size_fold = int(data.shape[0] / k)
    remainder_size_fold = int(data.shape[0] % k)
    data = data[:data.shape[0]-remainder_size_fold,:]
    k_folds = []
    idx_row = 0
    for i in range(k):
        X, y = Separar_X_y(data[idx_row:idx_row+size_fold, :])
        k_folds.append({"X": X, "y" : y})
        idx_row += size_fold
    return k_folds, size_fold

In [10]:
data_files = ["heart.csv", "diabetes.csv"]
num_iters = [500, 1000, 1500, 2000, 2500, 3000, 3500]
num_iters_label = num_iters.copy()
num_iters_label.insert(0, "Tasas de aprendizaje \ Numero de iteraciones")
learn_rates = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4]
k = 3

for name in data_files:
    result_table = [learn_rates]
    data = Leer_Datos(name)
    X, y = Separar_X_y(data.values)
    norm_data_X, mean_data_X, standard_dev_X = Normalizar_Datos(X)
    norm_data = np.concatenate((norm_data_X, y), axis=1)
    k_folds, size_fold = Crear_k_folds(norm_data, k)
    for num_iter in num_iters:
        learn_rate_row = []
        for learn_rate in learn_rates:
            cost_test_total = 0.0
            for i in range(k):
                X_train = np.zeros((size_fold * (k-1), norm_data.shape[1] - 1))
                X_test = np.zeros((size_fold, norm_data.shape[1] - 1))
                y_train = np.zeros((size_fold * (k-1), 1))
                y_test = np.zeros((size_fold, 1))
                count_sz_fold = 0
                for j in range(k):
                    if j == i:
                        X_test = k_folds[i]['X']
                        y_test = k_folds[i]['y']
                    else:
                        X_train[count_sz_fold:count_sz_fold+size_fold, :] = k_folds[j]['X']
                        y_train[count_sz_fold:count_sz_fold+size_fold, :] = k_folds[j]['y']
                        count_sz_fold += size_fold

                y_train = np.reshape(y_train, y_train.shape[0])
                y_test = np.reshape(y_test, y_test.shape[0])

                X_train = np.c_[X_train, np.ones(X_train.shape[0])]     #bias
                X_test = np.c_[X_test, np.ones(X_test.shape[0])]        #bias
                W = Crear_Pesos(X_train)
                W, costs = Gradiente_Descendiente(X_train, y_train, W, num_iter, learn_rate)
                cost_test = Calcular_Funcion_Costo(X_test, y_test, W)
                cost_test_total += cost_test

                #plt.plot(range(len(costs)), costs)
                #print(cost_test)
                #print(W)
            cost_test_total /= k
            learn_rate_row.append("%.4f" % cost_test_total)
        result_table.append(learn_rate_row)

    headerColor = 'grey'
    rowEvenColor = 'lightgrey'
    rowOddColor = 'white'

    fig = go.Figure(data=[go.Table(
    header=dict(
        values=num_iters_label,
        line_color='darkslategray',
        fill_color=headerColor,
        align=['left','center'],
        font=dict(color='white', size=12)
    ),
    cells=dict(
        values=result_table,
        line_color='darkslategray',
        fill_color = [[rowOddColor,rowEvenColor,rowOddColor, rowEvenColor,rowOddColor,rowEvenColor]*6],
        align = ['left', 'center'],
        font = dict(color = 'darkslategray', size = 11)
        ))
    ])
    fig.show()