<a href="https://colab.research.google.com/github/JoelGV/PrediccionReadmisionHospitalaria/blob/main/PrediccionReadmisionHospitalaria.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predicción de readmisión hospitalaria en pacientes con diabetes 
Este es un ejercicio de clasificiación en el que el objetivo es determinar si un paciente tendrá (o no) readmisión hospitalaria con base en las características:

- SystolicBPNBR: Presión sanguínea systólica 128 mm Hg, fuente: https://www.healthline.com/health/high-blood-pressure-hypertension/blood-pressure-reading-explained

- LDLNBR: Relacionado con el colesterol (su concentración) low--density lipoprotein que favorece los ataques catrdiacos,
fuente: https://www.webmd.com/heart-disease/ldl-cholesterol-the-bad-cholesterol#1

- A1CNBR: Es una métrica del promedio del nivel de azucar en la sangre de 2 o 3 meses. Puede tener otros nombres como: glycated hemoglobin, glycosylated hemoglobin, hemoglobin A1C and HbA1c. también sirve para diagnosticar diabetes tipo 1 o 2.
Fuente: https://www.mayoclinic.org/tests-procedures/a1c-test/about/pac-20384643

- ThirtyDayReadmitFLG: Readmision hospitalaria en pacientes con diabetes(readmision no planificada en los 30 dias despues de salir del hospital, ser dado de alta). Incrementa los costos. Incluso puede considerarse como una medida de la calidad del servicio.
Fuentes:
https://www.researchgate.net/publication/272841299_Hospital_Readmission_of_Patients_with_Diabetes
https://www.medicare.gov/hospitalcompare/Data/30-day-measures.html
https://en.wikipedia.org/wiki/Hospital_readmission

La base de datos fue tomada de **healthcare.ai**

In [None]:
import pandas as pd                                   # Librería para manejo de datos y archivos.
import numpy as np                                    # Librería para operaciones matemáticas.
from sklearn.model_selection import train_test_split  # Librería para construir conjuntos de entrenamiento y prueba.

NOTA: Recuerda subir a colaboratory el archivo 'DiabetesPreprocesado.csv'

In [None]:
# Lectura del archivo de datos.
dataset = pd.read_csv('DiabetesPreprocesado.csv')

In [None]:
# El método head() imprime los primeros registros del dataframe.
dataset.head()

Unnamed: 0,PatientEncounterID,SystolicBPNBR,LDLNBR,A1CNBR,GenderFLG,ThirtyDayReadmitFLG
0,1,167,195,4.2,0,0
1,2,153,214,5.0,0,0
2,3,170,191,4.0,0,0
3,4,187,135,4.4,0,0
4,5,188,125,4.3,0,0


In [None]:
# Regsitros de pacientes con readmisión hopitalaria.
datasetCR = dataset[dataset['ThirtyDayReadmitFLG'] == 1]
datasetCR

Unnamed: 0,PatientEncounterID,SystolicBPNBR,LDLNBR,A1CNBR,GenderFLG,ThirtyDayReadmitFLG
9,10,160,130,8.0,0,1
12,13,153,218,8.0,0,1
13,14,155,218,8.0,0,1
14,15,159,218,8.0,0,1
18,19,187,218,8.0,0,1
...,...,...,...,...,...,...
959,960,161,106,8.0,0,1
960,961,153,87,8.0,0,1
961,962,162,109,8.0,0,1
962,963,160,105,8.0,0,1


## Procesamiento de los datos

In [None]:
# De la tabla original se eliminará la primera columna.
dataset = dataset.drop(['PatientEncounterID'], axis = 1)
dataset.head()

Unnamed: 0,SystolicBPNBR,LDLNBR,A1CNBR,GenderFLG,ThirtyDayReadmitFLG
0,167,195,4.2,0,0
1,153,214,5.0,0,0
2,170,191,4.0,0,0
3,187,135,4.4,0,0
4,188,125,4.3,0,0


In [None]:
#Estandarización
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(dataset)

dataset = scaler.transform(dataset)

dataset[0:5]

array([[0.67      , 0.83221477, 0.05      , 0.        , 0.        ],
       [0.53      , 0.95973154, 0.25      , 0.        , 0.        ],
       [0.7       , 0.80536913, 0.        , 0.        , 0.        ],
       [0.87      , 0.4295302 , 0.1       , 0.        , 0.        ],
       [0.88      , 0.36241611, 0.075     , 0.        , 0.        ]])

In [None]:
# Construcción de los conjuntos de entrenamiento y prueba.
caracteristicas = dataset[:, :-1]
target = dataset[:, -1]
x_train, x_test, y_train, y_test = train_test_split(caracteristicas, target, 
                                                    test_size = 0.20,
                                                    random_state = 10)

In [None]:
# verificación del tamaño de los conjuntos de train y test
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(800, 4)
(800,)
(200, 4)
(200,)


## Construcción de la máquina de vector soporte

In [None]:
from sklearn import svm

In [None]:
modeloSVM = svm.SVC(C=100, kernel='poly', degree=3, probability=True)
modeloSVM.fit(x_train, y_train)

SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [None]:
acc_train = modeloSVM.score(x_train, y_train)
acc_test = modeloSVM.score(x_test, y_test)
print('acc_train = ', acc_train)
print('acc_test = ', acc_test)

acc_train =  0.88125
acc_test =  0.885


In [None]:
y_test

array([0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1.])

In [None]:
modeloSVM.predict(x_test)

array([0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

In [None]:
x_test[1]

array([0.76     , 0.4966443, 1.       , 0.       ])

## Prueba del modelo

In [None]:
# Definición de los datos de un paciente ficticio para realizar una prueba de clasificación con la 
# máquina de vector soporte.
paciente = np.array([[0.7, 0.5, 1.0, 0]])

In [None]:
# Si la respuesta del modelo es 0 esto nos indica que el paciente pertenece al grupo de personas que 
# NO tendrá readmisión hospitalaria (o que es muy probale que no lo tenga). Si el resultado es 1, esto 
# quiere decir que es muy probable que el paciente sufra una readmisión hospitalaria.
diagnostico = modeloSVM.predict_proba(paciente)
print(diagnostico)

[[0.34132977 0.65867023]]


## Árbol de decisión

In [None]:
from sklearn import tree

In [None]:
modeloTree = tree.DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 5)
modeloTree.fit(x_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [None]:
# Evaluación
acc_train_tree = modeloTree.score(x_train, y_train)
acc_test_tree = modeloTree.score(x_test, y_test)
print('acc_train = ', acc_train_tree)
print('acc_test = ', acc_test_tree)

acc_train =  0.975
acc_test =  0.865


In [None]:
# Prueba
# Si la respuesta del modelo es 0 esto nos indica que el paciente pertenece al grupo de personas que 
# NO tendrá readmisión hospitalaria (o que es muy probale que no lo tenga). Si el resultado es 1, esto 
# quiere decir que es muy probable que el paciente sufra una readmisión hospitalaria.
diagnostico = modeloTree.predict_proba(paciente)
print(diagnostico)

[[0.57142857 0.42857143]]


## Método de ensamble

In [None]:
from sklearn import ensemble

In [None]:
modeloAdaboost = ensemble.AdaBoostClassifier(n_estimators=100)
modeloAdaboost.fit(x_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=100, random_state=None)

In [None]:
modeloAdaboostMSV = ensemble.AdaBoostClassifier(base_estimator=modeloSVM, n_estimators=100)
modeloAdaboostMSV.fit(x_train, y_train)
# Evaluación
acc_train_ABMSV = modeloAdaboostMSV.score(x_train, y_train)
acc_test_ABMSV = modeloAdaboostMSV.score(x_test, y_test)
print('acc_train = ', acc_train_ABMSV)
print('acc_test = ', acc_test_ABMSV)

acc_train =  0.84
acc_test =  0.86


In [None]:
# Evaluación
acc_train_AB = modeloAdaboost.score(x_train, y_train)
acc_test_AB = modeloAdaboost.score(x_test, y_test)
print('acc_train = ', acc_train_AB)
print('acc_test = ', acc_test_AB)

acc_train =  0.87375
acc_test =  0.845


In [None]:
modeloRF = ensemble.RandomForestClassifier(n_estimators=100)
modeloRF.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
# Evaluación
acc_train_RF = modeloRF.score(x_train, y_train)
acc_test_RF = modeloRF.score(x_test, y_test)
print('acc_train = ', acc_train_RF)
print('acc_test = ', acc_test_RF)

acc_train =  0.99875
acc_test =  0.915
