# Cargar y Procesar ENSANUT 2018

In [6]:
# Importamos las librerias necesarias

# Tratamiento de datos
import pandas as pd
import numpy as np
import math
import statsmodels.api as sm
%matplotlib inline

# Gráficos
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

# Preprocesado y modelado
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Configuración matplotlib
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')

# Configuración warnings
import warnings
warnings.filterwarnings('ignore')

# Importamos el archivo CS_ADULTOS.csv obtenido de la pagina del 'INEGI' y lo convertimos en 'DataFrame'
data = pd.read_csv('CS_ADULTOS.csv')
data = pd.DataFrame(data)
data = data.fillna(0)

# Filtramos y eliminamos las columnas del dataframe que no guardan relacion con la columna 'P3_1' (Diabetes)
data = data.drop(['UPM', 'VIV_SEL', 'HOGAR', 'NUMREN', 'P1_2', 'P1_3',
       'P1_5', 'P1_6', 'P1_7', 'P1_8', 'P1_9', 'P1_10_1',
       'P1_10_2', 'P1_10_3', 'P1_10_4', 'P1_10_5', 'P1_10_6', 'P1_10_7',
       'P1_10_8', 'P1_10_9', 'P1_10_10', 'P2_1_1',
       'P2_1_6',
       'P3_2', 'P3_3V', 'P3_4', 'P3_5D', 'P3_5M', 'P3_5A', 'P3_6',
       'P3_7_1', 'P3_7_2', 'P3_7_3', 'P3_7_4', 'P3_7_5', 'P3_7_6',
       'P3_7_7', 'P3_7_8', 'P3_7_9', 'P3_7_10', 'P3_7_11', 'P3_7_12',
       'P3_8', 'P3_9M', 'P3_9A', 'P3_10M', 'P3_10A', 'P3_11', 'P3_12',
       'P3_14_1',
       'P3_14_2', 'P3_14_3', 'P3_14_4',
       'P3_14_5', 'P3_14_6', 'P3_15_6',
       'P3_14_7', 'P3_14_8', 'P3_16_2',
       'P3_16_7', 'P3_16_8', 'P3_16_10',
       'P3_16_11', 'P3_16_12',
       'P3_16_17', 'P3_16_18',
       'P4_2M', 'P4_2A', 'P4_3', 'P4_4',
       'P4_5M', 'P4_5A', 'P4_6', 'P4_7', 'P4_8_1', 'P4_8_2', 'P4_8_3',
       'P4_8_4', 'P4_8_5', 'P4_9', 'P4_9_1', 'P4_10_1', 'P4_10_2',
       'P4_10_3', 'P4_10_4', 'P4_10_5', 'P4_10_5V', 'P4_10_6', 'P4_10_6V',
       'P4_10_6C', 'P5_1', 'P5_2_2', 'P5_2_3', 'P5_3', 'P5_4',
       'P5_5', 'P5_6', 'P5_7', 'P6_1_1', 'P6_1_2', 'P6_2_1',
       'P6_2_2', 'P6_2_3', 'P6_2_4', 'P6_2_5', 'P6_2_6', 'P6_2_7', 'P6_3',
       'P6_4', 'P6_5_1', 'P6_5_2', 'P6_5_3', 'P6_5_4', 'P6_6', 'P6_7_1',
       'P6_7_2', 'P6_7_3', 'P6_7_4', 'P6_8_1', 'P6_8_2', 'P6_8_3',
       'P6_8_4', 'P6_8_5', 'P6_8_6', 'P6_9', 'P7_1', 'P7_2', 'P7_3',
       'P7_4_1', 'P7_5_1',
       'P7_4_2', 'P7_5_2',
       'P7_4_3', 'P7_5_3', 'P8_1', 'P8_2_1', 'P8_2_2', 'P8_2_3',
       'P8_2_4', 'P8_2_5', 'P8_2_6', 'P8_2_7', 'P8_2_8', 'P8_2_9',
       'P8_2_10', 'P8_2_11', 'P8_2_12', 'P8_2_13', 'P8_2_14', 'P8_2_15',
       'P8_3_1', 'P8_3_2', 'P8_3_3', 'P8_3_4', 'P8_3_5', 'P8_3_6',
       'P8_3_7', 'P8_3_8', 'P8_3_9', 'P8_3_10', 'P8_3_11', 'P8_3_12',
       'P8_3_13', 'P8_3_14', 'P8_3_15', 'P8_3_16', 'P8_3_17', 'P8_4',
       'P8_5', 'P8_6M', 'P8_6A', 'P8_7', 'P8_8', 'P8_9', 'P8_10',
       'P8_11_1', 'P8_11_2', 'P8_11_3', 'P8_11_4', 'P8_11_5', 'P8_13',
       'P8_14_1', 'P8_14_2', 'P8_14_3', 'P8_14_4', 'P8_14_5', 'P8_14_6',
       'P8_14_7', 'P8_15', 'P8_16', 'P8_17_1', 'P8_17_2', 'P8_17_3',
       'P8_17_4', 'P8_17_5', 'P8_17_6', 'P8_17_7', 'P8_17_8', 'P8_17_9',
       'P8_17_10', 'P8_17_11', 'P8_17_12', 'P8_17_13', 'P8_17_14',
       'P8_17_15', 'P8_17_16', 'P8_18', 'P8_19', 'P8_20_1', 'P8_20_2',
       'P8_21_1', 'P8_21_2', 'P8_21_3', 'P8_21_4', 'P8_21_5', 'P8_21_6',
       'P8_21_7', 'P8_21_8', 'P8_21_9', 'P8_21_10', 'P8_21_11',
       'P8_21_12', 'P8_21_13', 'P8_21_14', 'P8_22', 'P8_23', 'P8_24_1',
       'P8_24_2', 'P8_24_3', 'P8_25_1', 'P8_25_2', 'P8_26_1', 'P8_26_2',
       'P8_26_3', 'P8_26_4', 'P8_26_5', 'P8_26_6', 'P8_26_7', 'P8_26_8',
       'P8_26_9', 'P8_26_10', 'P8_27', 'P8_28', 'P8_29_1', 'P8_29_2',
       'P8_29_3', 'P8_29_4', 'P8_29_5', 'P8_30', 'P8_31', 'P8_32', 'P9_1',
       'P9_2', 'P9_3', 'P9_4', 'P9_5', 'P9_6', 'P9_7', 'P9_8', 'P9_9_A1',
       'P9_9_B1D', 'P9_9_B1M', 'P9_9_B1A', 'P9_9_C1', 'P9_9_A2',
       'P9_9_B2D', 'P9_9_B2M', 'P9_9_B2A', 'P9_9_C2', 'P9_9_A3',
       'P9_9_B3D', 'P9_9_B3M', 'P9_9_B3A', 'P9_9_C3', 'P9_10_A1',
       'P9_10_B1D', 'P9_10_B1M', 'P9_10_B1A', 'P9_10_C1', 'P9_10_A2',
       'P9_10_B2D', 'P9_10_B2M', 'P9_10_B2A', 'P9_10_C2', 'P9_10_A3',
       'P9_10_B3D', 'P9_10_B3M', 'P9_10_B3A', 'P9_10_C3', 'P9_10_A4',
       'P9_10_B4D', 'P9_10_B4M', 'P9_10_B4A', 'P9_10_C4', 'P9_11_A1',
       'P9_11_B1D', 'P9_11_B1M', 'P9_11_B1A', 'P9_11_C1', 'P9_12_A1',
       'P9_12_B1D', 'P9_12_B1M', 'P9_12_B1A', 'P9_12_C1', 'P9_12_A2',
       'P9_12_B2D', 'P9_12_B2M', 'P9_12_B2A', 'P9_12_C2', 'P9_13',
       'P9_14', 'P9_15', 'P9_16', 'P9_17', 'P9_18', 'P9_19', 'P9_20',
       'P9_21', 'P9_22_A1', 'P9_22_B1D', 'P9_22_B1M', 'P9_22_B1A',
       'P9_22_C1', 'P9_22_A2', 'P9_22_B2D', 'P9_22_B2M', 'P9_22_B2A',
       'P9_22_C2', 'P9_22_A3', 'P9_22_B3D', 'P9_22_B3M', 'P9_22_B3A',
       'P9_22_C3', 'P9_23_A1', 'P9_23_B1D', 'P9_23_B1M', 'P9_23_B1A',
       'P9_23_C1', 'P9_23_A2', 'P9_23_B2D', 'P9_23_B2M', 'P9_23_B2A',
       'P9_23_C2', 'P9_23_A3', 'P9_23_B3D', 'P9_23_B3M', 'P9_23_B3A',
       'P9_23_C3', 'P9_23_A4', 'P9_23_B4D', 'P9_23_B4M', 'P9_23_B4A',
       'P9_23_C4', 'P9_24_A1', 'P9_24_B1D', 'P9_24_B1M', 'P9_24_B1A',
       'P9_24_C1', 'P9_25', 'P10_1_1', 'P10_2_1', 'P10_3_1', 'P10_4_1',
       'P10_5_1', 'P10_6_1', 'P10_7_1', 'P10_1_2', 'P10_2_2', 'P10_3_2',
       'P10_4_2', 'P10_5_2', 'P10_6_2', 'P10_7_2', 'P10_1_3', 'P10_2_3',
       'P10_3_3', 'P10_4_3', 'P10_5_3', 'P10_6_3', 'P10_7_3', 'P10_1_4',
       'P10_2_4', 'P10_3_4', 'P10_4_4', 'P10_5_4', 'P10_6_4', 'P10_7_4',
       'P10_1_5', 'P10_2_5', 'P10_3_5', 'P10_4_5', 'P10_5_5', 'P10_6_5',
       'P10_7_5', 'P10_1_6', 'P10_2_6', 'P10_3_6', 'P10_4_6', 'P10_5_6',
       'P10_6_6', 'P10_7_6', 'P10_1_7', 'P10_2_7', 'P10_3_7', 'P10_4_7',
       'P10_5_7', 'P10_6_7', 'P10_7_7', 'P10_1_8', 'P10_2_8', 'P10_3_8',
       'P10_4_8', 'P10_5_8', 'P10_6_8', 'P10_7_8', 'P10_1_9', 'P10_2_9',
       'P10_3_9', 'P10_4_9', 'P10_5_9', 'P10_6_9', 'P10_7_9', 'P10_1_10',
       'P10_2_10', 'P10_3_10', 'P10_4_10', 'P10_5_10', 'P10_6_10',
       'P10_7_10', 'P10_1_11', 'P10_2_11', 'P10_3_11', 'P10_4_11',
       'P10_5_11', 'P10_6_11', 'P10_7_11', 'P11_1', 'P11_2', 'P11_3',
       'P11_4', 'P11_5', 'P11_6', 'P11_7', 'P11_8', 'P12_1', 'P12_2_1',
       'P12_2_2', 'P12_2_3', 'P12_2_4', 'P12_2_5', 'P12_2_6', 'P12_2_7',
       'P12_2_8', 'P12_2_9', 'P12_2_10', 'P12_2_11', 'P12_3', 'P12_4_1',
       'P12_5', 'P12_6', 'P12_7', 'P12_7_1', 'P12_8', 'P12_8_1', 'P13_1',
       'P13_3', 'P13_4', 'P13_5', 'P13_6', 'P13_6_1', 'P13_7_1',
       'P13_7_2', 'P13_8', 'P13_8_1', 'P13_9', 'P13_10',
       'P13_12_2', 'P13_13M', 'P13_13A', 'P13_14', 'P14_1',
       'P14_2', 'P14_3', 'P14_4', 'P14_5', 'P14_6', 'P14_7', 'P14_8',
       'P14_9', 'P14_10', 'ENT', 'DOMINIO', 'REGION',
       'EST_DIS', 'UPM_DIS', 'ESTRATO', 'F_20MAS', 'FECHA_NAC',
       'FECHA_ENT', 'DIFERENCIA', 'HIJ_ULT5AD', 'HIJ_ULT1AD', 'EDAD'], axis=1)

# Convertimos todos los datos en formato 'int32'
data = data.astype(int)
data.dtypes

# Una vez analizados los datos, convertimos el contenido en '1' y '0' para que el DataFrame sea binario

data.replace({'P1_1' : 2}, 0, inplace=True)

data.replace({'P1_4' : 1}, 0, inplace=True)
data.replace({'P1_4' : 2}, 0, inplace=True)
data.replace({'P1_4' : 3}, 0, inplace=True)
data.replace({'P1_4' : 4}, 0, inplace=True)
data.replace({'P1_4' : 5}, 0, inplace=True)
data.replace({'P1_4' : 6}, 1, inplace=True)
data.replace({'P1_4' : 7}, 1, inplace=True)
data.replace({'P1_4' : 8}, 1, inplace=True)
data.replace({'P1_4' : 9}, 1, inplace=True)

data.replace({'P2_1_2' : 1}, 0, inplace=True)
data.replace({'P2_1_2' : 2}, 0, inplace=True)
data.replace({'P2_1_2' : 3}, 1, inplace=True)
data.replace({'P2_1_2' : 4}, 1, inplace=True)

data.replace({'P2_1_3' : 1}, 0, inplace=True)
data.replace({'P2_1_3' : 2}, 0, inplace=True)
data.replace({'P2_1_3' : 3}, 1, inplace=True)
data.replace({'P2_1_3' : 4}, 1, inplace=True)

data.replace({'P2_1_4' : 1}, 0, inplace=True)
data.replace({'P2_1_4' : 2}, 0, inplace=True)
data.replace({'P2_1_4' : 3}, 1, inplace=True)
data.replace({'P2_1_4' : 4}, 1, inplace=True)

data.replace({'P2_1_5' : 1}, 0, inplace=True)
data.replace({'P2_1_5' : 2}, 0, inplace=True)
data.replace({'P2_1_5' : 3}, 1, inplace=True)
data.replace({'P2_1_5' : 4}, 1, inplace=True)

data.replace({'P2_1_7' : 1}, 0, inplace=True)
data.replace({'P2_1_7' : 2}, 0, inplace=True)
data.replace({'P2_1_7' : 3}, 1, inplace=True)
data.replace({'P2_1_7' : 4}, 1, inplace=True)

data.replace({'P2_2' : 2}, 0, inplace=True)

data.replace({'P3_1' : 2}, 0, inplace=True)
data.replace({'P3_1' : 3}, 0, inplace=True)

data.replace({'P3_3' : 1}, 0, inplace=True)
data.replace({'P3_3' : 2}, 1, inplace=True)
data.replace({'P3_3' : 3}, 1, inplace=True)
data.replace({'P3_3' : 4}, 1, inplace=True)
data.replace({'P3_3' : 5}, 0, inplace=True)

data.replace({'P3_15_1' : 1}, 0, inplace=True)
data.replace({'P3_15_1' : 2}, 0, inplace=True)
data.replace({'P3_15_1' : 3}, 0, inplace=True)
data.replace({'P3_15_1' : 4}, 0, inplace=True)
data.replace({'P3_15_1' : 5}, 0, inplace=True)
data.replace({'P3_15_1' : 6}, 1, inplace=True)
data.replace({'P3_15_1' : 7}, 1, inplace=True)
data.replace({'P3_15_1' : 8}, 1, inplace=True)
data.replace({'P3_15_1' : 9}, 1, inplace=True)
data.replace({'P3_15_1' : 10}, 1, inplace=True)
data.replace({'P3_15_1' : 11}, 1, inplace=True)
data.replace({'P3_15_1' : 12}, 1, inplace=True)

data.replace({'P3_15_2' : 1}, 0, inplace=True)
data.replace({'P3_15_2' : 2}, 0, inplace=True)
data.replace({'P3_15_2' : 3}, 0, inplace=True)
data.replace({'P3_15_2' : 4}, 0, inplace=True)
data.replace({'P3_15_2' : 5}, 0, inplace=True)
data.replace({'P3_15_2' : 6}, 1, inplace=True)
data.replace({'P3_15_2' : 7}, 1, inplace=True)
data.replace({'P3_15_2' : 8}, 1, inplace=True)
data.replace({'P3_15_2' : 9}, 1, inplace=True)
data.replace({'P3_15_2' : 10}, 1, inplace=True)
data.replace({'P3_15_2' : 11}, 1, inplace=True)
data.replace({'P3_15_2' : 12}, 1, inplace=True)

data.replace({'P3_15_3' : 1}, 0, inplace=True)
data.replace({'P3_15_3' : 2}, 0, inplace=True)
data.replace({'P3_15_3' : 3}, 0, inplace=True)
data.replace({'P3_15_3' : 4}, 0, inplace=True)
data.replace({'P3_15_3' : 5}, 0, inplace=True)
data.replace({'P3_15_3' : 6}, 1, inplace=True)
data.replace({'P3_15_3' : 7}, 1, inplace=True)
data.replace({'P3_15_3' : 8}, 1, inplace=True)
data.replace({'P3_15_3' : 9}, 1, inplace=True)
data.replace({'P3_15_3' : 10}, 1, inplace=True)
data.replace({'P3_15_3' : 11}, 1, inplace=True)
data.replace({'P3_15_3' : 12}, 1, inplace=True)

data.replace({'P3_15_4' : 1}, 0, inplace=True)
data.replace({'P3_15_4' : 2}, 0, inplace=True)
data.replace({'P3_15_4' : 3}, 0, inplace=True)
data.replace({'P3_15_4' : 4}, 0, inplace=True)
data.replace({'P3_15_4' : 5}, 0, inplace=True)
data.replace({'P3_15_4' : 6}, 1, inplace=True)
data.replace({'P3_15_4' : 7}, 1, inplace=True)
data.replace({'P3_15_4' : 8}, 1, inplace=True)
data.replace({'P3_15_4' : 9}, 1, inplace=True)
data.replace({'P3_15_4' : 10}, 1, inplace=True)
data.replace({'P3_15_4' : 11}, 1, inplace=True)
data.replace({'P3_15_4' : 12}, 1, inplace=True)

data.replace({'P3_15_5' : 1}, 0, inplace=True)
data.replace({'P3_15_5' : 2}, 0, inplace=True)
data.replace({'P3_15_5' : 3}, 0, inplace=True)
data.replace({'P3_15_5' : 4}, 0, inplace=True)
data.replace({'P3_15_5' : 5}, 0, inplace=True)
data.replace({'P3_15_5' : 6}, 1, inplace=True)
data.replace({'P3_15_5' : 7}, 1, inplace=True)
data.replace({'P3_15_5' : 8}, 1, inplace=True)
data.replace({'P3_15_5' : 9}, 1, inplace=True)
data.replace({'P3_15_5' : 10}, 1, inplace=True)
data.replace({'P3_15_5' : 11}, 1, inplace=True)
data.replace({'P3_15_5' : 12}, 1, inplace=True)

data.replace({'P3_15_7' : 1}, 0, inplace=True)
data.replace({'P3_15_7' : 2}, 0, inplace=True)
data.replace({'P3_15_7' : 3}, 0, inplace=True)
data.replace({'P3_15_7' : 4}, 0, inplace=True)
data.replace({'P3_15_7' : 5}, 0, inplace=True)
data.replace({'P3_15_7' : 6}, 1, inplace=True)
data.replace({'P3_15_7' : 7}, 1, inplace=True)
data.replace({'P3_15_7' : 8}, 1, inplace=True)
data.replace({'P3_15_7' : 9}, 1, inplace=True)
data.replace({'P3_15_7' : 10}, 1, inplace=True)
data.replace({'P3_15_7' : 11}, 1, inplace=True)
data.replace({'P3_15_7' : 12}, 1, inplace=True)

data.replace({'P3_17' : 2}, 1, inplace=True)
data.replace({'P3_17' : 3}, 1, inplace=True)
data.replace({'P3_17' : 4}, 0, inplace=True)
data.replace({'P3_17' : 5}, 0, inplace=True)

data.replace({'P3_18_1' : 2}, 0, inplace=True)
data.replace({'P3_18_2' : 2}, 0, inplace=True)
data.replace({'P3_18_3' : 2}, 0, inplace=True)
data.replace({'P3_18_4' : 2}, 0, inplace=True)
data.replace({'P3_18_5' : 2}, 0, inplace=True)
data.replace({'P3_18_6' : 2}, 0, inplace=True)
data.replace({'P3_18_7' : 2}, 0, inplace=True)
data.replace({'P3_18_8' : 2}, 0, inplace=True)
data.replace({'P3_18_9' : 2}, 0, inplace=True)

data.replace({'P4_1' : 2}, 0, inplace=True)

data.replace({'P5_2_1' : 2}, 0, inplace=True)

data.replace({'P6_1_3' : 2}, 0, inplace=True)

data.replace({'P7_1_1' : 2}, 0, inplace=True)
data.replace({'P7_1_1' : 9}, 0, inplace=True)

data.replace({'P7_2_1' : 2}, 0, inplace=True)
data.replace({'P7_2_1' : 9}, 0, inplace=True)

data.replace({'P7_3_1' : 2}, 0, inplace=True)
data.replace({'P7_3_1' : 9}, 0, inplace=True)

data.replace({'P7_1_2' : 2}, 0, inplace=True)
data.replace({'P7_1_2' : 9}, 0, inplace=True)

data.replace({'P7_2_2' : 2}, 0, inplace=True)
data.replace({'P7_2_2' : 9}, 0, inplace=True)

data.replace({'P7_3_2' : 2}, 0, inplace=True)
data.replace({'P7_3_2' : 9}, 0, inplace=True)

data.replace({'P7_1_3' : 2}, 0, inplace=True)
data.replace({'P7_1_3' : 9}, 0, inplace=True)

data.replace({'P7_2_3' : 2}, 0, inplace=True)
data.replace({'P7_2_3' : 9}, 0, inplace=True)

data.replace({'P7_3_3' : 2}, 0, inplace=True)
data.replace({'P7_3_3' : 9}, 0, inplace=True)

data.replace({'P13_2' : 2}, 1, inplace=True)
data.replace({'P13_2' : 3}, 0, inplace=True)
data.replace({'P13_2' : 8}, 0, inplace=True)

data.replace({'P13_11' : 2}, 0, inplace=True)
data.replace({'P13_11' : 3}, 0, inplace=True)

data.replace({'P13_12_1' : 2}, 1, inplace=True)
data.replace({'P13_12_1' : 3}, 0, inplace=True)
data.replace({'P13_12_1' : 4}, 0, inplace=True)
data.replace({'P13_12_1' : 8}, 0, inplace=True)

data.replace({'SEXO' : 2}, 0, inplace=True)

# Para el analisis predictivo, creamos una poblacion n=3000 seleccionando individuos con diabetes
PDiabeticos = data[data['P3_1']>0]
PDiabeticos = PDiabeticos.sample(n=3000, random_state=1)
PDiabeticos

# Creamos otra poblacion n=3000 seleccionando individuos no diabeticos
PNDiabeticos = data[data['P3_1']==0]
PNDiabeticos = PNDiabeticos.sample(n=3000, random_state=1)
PNDiabeticos

# Creamos una poblacion de 6000 registros con 50% individuos diaveticos y 50% de individuos no diabeticos
poblacion = pd.concat([PDiabeticos,PNDiabeticos], axis=0)
poblacion

# Mostramos estadisticas descriptivas basicas del 'DataFrame' con el que vamos a trabajar
poblacion.describe()

# Definimos la variable dependiente Y y las variables independientes X del 'DataFrame'
Y = poblacion['P3_1']
X = poblacion.drop('P3_1',axis =1)
print(poblacion['P3_1'].value_counts())

# Dividimos la muestra prueba y entrenamiento
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

# Aplicamos una normalizacion a los datos para asegurarnos de trabajar en las mismas escalas
# Para el modelo de clasificacion aplicamos el algoritmo de Regresion Logistica Optimizado mediante 'GD'
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_array = sc.fit_transform(X_train.values)
X_train = pd.DataFrame(X_train_array, index=X_train.index, columns=X_train.columns)
X_test_array = sc.transform(X_test.values)
X_test = pd.DataFrame(X_test_array, index=X_test.index, columns=X_test.columns)

# Algoritmo de 'Gradiente Decendiente'
class LogisticRegressionGD(object):

    def __init__(self, l_rate = 0.1, n_iter =10000, random_state =1):
        self.l_rate = l_rate
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        rgen = np.random.RandomState(self.random_state)
        self.theta = rgen.normal(loc = 0.0, scale = 0.01,
                                 size = 1 + X.shape[1])
        for i in range(self.n_iter):
            net_input = self.net_input(X)
            h = self.sigmoid(net_input)
            errors = y-h
            self.theta[1:] += -self.l_rate*X.T.dot(errors)
            self.theta[0] += -self.l_rate*errors.sum()
        return self.theta

    def sigmoid(self, z):
        return 1. / (1. + np.exp(-np.clip(z, -250, 250)))

    def net_input(self, X):
        return np.dot(X, self.theta[1:]) + self.theta[0]

    def predict(self, X):
        return np.where(self.sigmoid(self.net_input(X))>= 0.5, 0, 1)

# Aplicación del modelo de Regresion Logistica optimizado mediante Gradiente Decendeinte
regression = LogisticRegressionGD(l_rate = 0.0000001, n_iter = 20000)
coef = regression.fit(X_train, Y_train)
Y_predict = regression.predict(X_test)

# Matriz de confusión para el modelo de Regresion Logista mediante Gradiente Decendiente
from sklearn.metrics import confusion_matrix

confusion_matrix = pd.crosstab(Y_predict, Y_test.ravel(), rownames=['Prediccion'], colnames=['Real'])
confusion_matrix

# Muestra la precision del modelo realizado mediante 'Regresion Logistica' con 'Gradiente Descendiente'
print(f"La precision del modelo 'Regresion Logistica GD' es de: {100*(1125/1200)}%")

# Muestra los valores predichos por el modelo
y_pred = regression.predict(X_test)
y_pred

# Con la finalidad de realizar una comparacion, realizamos la clasificacion de los datos mediante el algoritmo de 'SVM'
from sklearn.svm import SVC
clf=SVC(kernel='poly').fit(X_train,Y_train)

predicciones = clf.predict(X_test)
predicciones

# Muestra la precision del modelo realizado mediante el algoritmo 'SVM'
print(f"La precision del modelo es de: {100*clf.score(X_test,Y_test)}%")

# Matriz de confusión de las predicciones de test (SVM)
confusion_matrix = pd.crosstab(predicciones, Y_test.ravel(), rownames=['Prediccion'], colnames=['Real'])
confusion_matrix

# Muestra la matriz de correlacion de las variables del 'DataFrame'
PDiabeticos = data[data['P3_1']==1]
corr = poblacion.corr()
corr

P3_1
1    3000
0    3000
Name: count, dtype: int64
La precision del modelo 'Regresion Logistica GD' es de: 93.75%
La precision del modelo es de: 99.08333333333333%


Unnamed: 0,P1_1,P1_4,P2_1_2,P2_1_3,P2_1_4,P2_1_5,P2_1_7,P2_2,P3_1,P3_3,...,P7_1_2,P7_2_2,P7_3_2,P7_1_3,P7_2_3,P7_3_3,P13_2,P13_11,P13_12_1,SEXO
P1_1,1.0,0.380894,0.062279,0.069028,0.047451,0.04346,0.06892,0.042889,0.19735,0.171925,...,0.12068,0.121536,0.070148,0.063217,0.109315,0.063406,-0.00431,0.005221,-0.023936,-0.11199
P1_4,0.380894,1.0,0.044069,0.046048,0.04187,0.03414,0.038621,0.069207,0.082716,0.085329,...,0.063739,0.091346,0.044578,0.050204,0.082565,0.042368,0.00033,0.012672,-0.018931,-0.038498
P2_1_2,0.062279,0.044069,1.0,0.56952,0.538103,0.266608,0.529379,0.072604,0.090064,0.053889,...,0.006301,0.029496,0.05542,0.048617,0.06333,0.03216,0.002228,-0.05193,-0.020439,-0.114596
P2_1_3,0.069028,0.046048,0.56952,1.0,0.610212,0.314885,0.690826,0.084082,0.127287,0.078525,...,0.027322,0.057555,0.056962,0.07523,0.067243,0.039586,0.016996,-0.07139,-0.032251,-0.143061
P2_1_4,0.047451,0.04187,0.538103,0.610212,1.0,0.289788,0.539595,0.093804,0.101902,0.049252,...,0.029397,0.041689,0.056313,0.065254,0.072466,0.025197,0.017743,-0.05426,-0.000624,-0.112612
P2_1_5,0.04346,0.03414,0.266608,0.314885,0.289788,1.0,0.319707,0.079676,0.067699,0.061878,...,0.023889,0.047938,0.059499,0.048594,0.061873,0.033519,0.018723,-0.00565,-0.01617,-0.074222
P2_1_7,0.06892,0.038621,0.529379,0.690826,0.539595,0.319707,1.0,0.093299,0.122636,0.089938,...,0.020394,0.04613,0.078058,0.078841,0.076242,0.058249,-0.010427,-0.079517,-0.049987,-0.162411
P2_2,0.042889,0.069207,0.072604,0.084082,0.093804,0.079676,0.093299,1.0,0.083061,0.0638,...,0.010832,-0.000183,0.043428,0.045583,0.042417,0.041454,-0.033812,-0.045076,-0.000471,-0.041826
P3_1,0.19735,0.082716,0.090064,0.127287,0.101902,0.067699,0.122636,0.083061,1.0,0.635044,...,0.175599,0.038773,0.116287,0.372674,0.203898,0.121544,-0.091396,-0.142657,-0.038399,-0.065267
P3_3,0.171925,0.085329,0.053889,0.078525,0.049252,0.061878,0.089938,0.0638,0.635044,1.0,...,0.121193,0.035924,0.09499,0.263823,0.15862,0.112435,-0.065087,-0.142986,-0.074931,-0.109128
