# Desarrollo de modelos predictivos para estimación de mortalidad en pacientes COVID-19

<h1>Tabla de Contenido<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Desarrollo-de-modelos-predictivos-para-estimación-de-mortalidad-en-pacientes-COVID-19" data-toc-modified-id="Desarrollo-de-modelos-predictivos-para-estimación-de-mortalidad-en-pacientes-COVID-19-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Desarrollo de modelos predictivos para estimación de mortalidad en pacientes COVID-19</a></span><ul class="toc-item"><li><span><a href="#Librerías" data-toc-modified-id="Librerías-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Librerías</a></span></li></ul></li><li><span><a href="#Preprocesado-de-Datos" data-toc-modified-id="Preprocesado-de-Datos-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Preprocesado de Datos</a></span><ul class="toc-item"><li><span><a href="#Lectura-de-los-datos" data-toc-modified-id="Lectura-de-los-datos-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Lectura de los datos</a></span><ul class="toc-item"><li><span><a href="#Eliminado,-renombre-e-imputación-de-variables" data-toc-modified-id="Eliminado,-renombre-e-imputación-de-variables-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>Eliminado, renombre e imputación de variables</a></span></li></ul></li><li><span><a href="#Columnas-del-modelo-a-Priori" data-toc-modified-id="Columnas-del-modelo-a-Priori-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Columnas del modelo a Priori</a></span></li></ul></li><li><span><a href="#Estos-son-los-datos-definitivos-que-vamos-a-pasar-por-correo" data-toc-modified-id="Estos-son-los-datos-definitivos-que-vamos-a-pasar-por-correo-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Estos son los datos definitivos que vamos a pasar por correo</a></span></li></ul></div>

## Librerías

In [2]:
import pandas as pd
import numpy as np
import UTILS_LT as LT

pd.options.display.max_columns = None

# Para usar los Núcleos de la CPU
# ==============================================================================
import multiprocessing

# Machine Learning
# ==============================================================================
import umap
from imblearn.pipeline import make_pipeline
# Métricas
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, auc, precision_recall_curve
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA


#modelos
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier


#selección variables
from sklearn.model_selection import cross_val_score, cross_validate, GridSearchCV,train_test_split, ParameterGrid,RepeatedKFold, StratifiedKFold

# Imb-learn
from imblearn.under_sampling import NearMiss, RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN


%matplotlib inline

# Gráficos
# ==============================================================================
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.ticker as ticker
import seaborn as sns
import plotly.io as plt_io
import plotly.graph_objects as go

# Configuración Previa
pd.set_option('display.max_columns', None)

# Configuración matplotlib
# ==============================================================================
plt.rcParams['image.cmap'] = "bwr"
#plt.rcParams['figure.dpi'] = "100"
plt.rcParams['savefig.bbox'] = "tight"
style.use('ggplot') or plt.style.use('ggplot')

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

# Preprocesado de Datos

## Lectura de los datos

In [4]:
data = pd.read_csv('./my_data.csv')

### Eliminado, renombre e imputación de variables

In [5]:
LT.normalize_columns(data, inplace = True)

Problemas = ['C�NCER_NO_ACT',
            'C�NCER_TTO',
            'VALVULOPAT�A_PRE',
            'SUPRESI�N',
            'HD�A',
            'MIOCARDIOPAT�AS_PRE']

Arreglar = ['CANCER_NO_ACT',
            'CANCER_TTO',
            'VALVULOPATIA_PRE',
            'SUPRESION',
            'HDIA',
            'MIOCARDIOPATIAS_PRE']

data.rename(columns = dict(zip(Problemas,Arreglar)), inplace = True)

Eliminar_variables = ['UNNAMED: 0',
                      'ID',
                      'CCPP',
                      'DISTRITO',
                      'FECH_NAC',
                      'FECH_INGR_HOSP',
                      'FECH_INGR_UCI',
                      'FECHA_ALTA_HOSPITAL',
                      'FECHA_ALTA_UCI',
                      'VACUNADO_2019',
                      'VIH',
                      'VIH_ESTADO_INM',
                      'SUPRESION',
                      'TIPO_AFECT_PULMONAR_2',
                      'FECHA_SINTOMAS',
                      'FECHA_PCR',
                      'GRD_APR',
                      'NIVEL_GRAVEDAD',
                      'NIVEL_SEVERIDAD',
                      'REINGRESO',
                      'DIAG_1',
                      'DSN',
                      'FECHA_VACUNAC',
                      'GRIPE_2019',
                      'FECHA_DIAG_GRIPE',
                      'SEROTIPO_GRIPAL',
                      'TIPO_VACUNA_GRIPE',
                      'POA_DSN',
                      'EXITUS_RELATIVO',
                      'TIPALT',
                      'EVOLUCION']

data['ESTANCIA_UCI'] = data['ESTANCIA_UCI'].fillna(0)
data.drop(Eliminar_variables, axis = 'columns', inplace = True)

data['VALORES_FALTANTES'] = pd.isnull(data).sum(axis = 1)

cuantil_98 = data['VALORES_FALTANTES'].quantile(0.98)
data.drop(np.where(data['VALORES_FALTANTES'] > cuantil_98)[0], axis = 0, inplace = True)

data.reset_index(drop = True, inplace = True)

mapeo = {'Residencia':1,
         'No residencia':0}
data['RESIDENCIA'] = data['RESIDENCIA'].map(mapeo)

mapeo_sexo = {'Hombre':1,
              'Mujer':0}
data['SEXO'] = data['SEXO'].map(mapeo_sexo)


mapeo_tiping = {'Urgente':1,
              'Programado':0}
data['TIPING'] = data['TIPING'].map(mapeo_tiping)

# mapeo_procedencia = dict(zip(['Residencia', 'Domicilio', 'Otras', 'Otro Hospital'],[2,3,4,1]))
# data['PROCEDENCIA_ORIGEN'] = data['PROCEDENCIA_ORIGEN'].map(mapeo_procedencia)


#falta verificar, no sabemos lo que es más grave si I o IV
data['GRADO_ICC'] = data['GRADO_ICC'].fillna('0')
mapeo_grado_icc = dict(zip(['0', 'CF II', 'CF III', 'CF IV', 'CF I'],[0,2,3,4,1]))
data['GRADO_ICC'] = data['GRADO_ICC'].map(mapeo_grado_icc)

dis_df_columns = data.select_dtypes(exclude=np.number).columns
num_df_columns = data.select_dtypes(include=np.number).columns

print('Discretas: ',dis_df_columns)
print('\nNuméricas: ',num_df_columns)
print("===============================================================================")
num_values_dis_df_col = list(map(lambda col: (col,len(data[col].value_counts())), dis_df_columns))
print(num_values_dis_df_col)

dis_df_col_bin = list(map(lambda cv: cv[0],(filter(lambda cv: cv[1]==2,num_values_dis_df_col))))

def binarizar(x):
    if x == 'No':
        return 0
    elif x == 'Si':
        return 1
    else:
        return x
    
data[dis_df_col_bin] = data[dis_df_col_bin].applymap(lambda x: binarizar(x))
data['TIPO_AFECT_PULMONAR'] = data['TIPO_AFECT_PULMONAR'].fillna('infiltrado periferico bilateral')


data = pd.concat([data,pd.get_dummies(data['TIPO_AFECT_PULMONAR'], prefix='AFECT_PULM')], axis = 'columns')
data.drop('TIPO_AFECT_PULMONAR', axis = 'columns', inplace = True)
data.drop('ESTANCIA_TOT', axis = 'columns', inplace = True)
data.drop('RESIDENCIA', axis = 'columns', inplace = True)
data.drop('INGRESO_UCI', axis = 'columns', inplace = True)

data = pd.concat([data, pd.get_dummies(data['PROCEDENCIA_ORIGEN'], prefix='PROCEDENCIA')], axis = 'columns')
data.drop('PROCEDENCIA_ORIGEN', axis = 'columns', inplace = True)
data['ESTANCIA_MED'].fillna(0, inplace = True)
data['CANCER_TTO'].fillna(0, inplace = True)
data.drop('PH_ING', axis = 'columns', inplace = True)
data.drop('TIPING', axis = 'columns', inplace = True)
data.drop('P02_ART_ING', axis = 'columns', inplace = True)
data.drop('PCO2_ART_ING', axis = 'columns', inplace = True)
data.drop('VALORES_FALTANTES', axis = 'columns', inplace = True)
data['DIMEROD_ING'].fillna(data['DIMEROD_ING'].median(), inplace = True)
data['IL6_ING'].fillna(data['IL6_ING'].median(), inplace = True)

columnas_faltantes = ['TROPO_ING', 'FERRITINA_ING', 'ALBUMINA_ING', 'LACTATO_ING', 'FR_ING',
                       'BRR_TOT_ING', 'K_ING', 'ACT_PROTR_ING', 'GOT_ING', 'EXITUS', 'NA_ING',
                       'UREA_ING', 'TAS_ING', 'PREICTUS_PRE', 'GPT_ING', 'TAD_ING']

data['ESTANCIA_TOTAL'] = data['ESTANCIA_MED'] + data['ESTANCIA_UCI']

for columna in columnas_faltantes:
    data[columna].fillna(data[columna].median(), inplace = True)

Discretas:  Index(['INGRESO_UCI', 'BROTE_INTROHOSP', 'EXITUS', 'HIPOTIROIDISMO_PRE',
       'HTA_PRE', 'EPOC_PRE', 'ASMA_BRONQUIAL_PRE', 'PROCEDENCIA_ORIGEN',
       'HEPAT_PRE', 'FUMADOR', 'EXFUMADOR', 'ICC_PRE', 'PREICTUS_PRE',
       'DISLIPEMIA_PRE', 'DM_PRE', 'CANCER_NO_ACT', 'CANCER_TTO',
       'SOBREPESO_OBESIDAD', 'VALVULOPATIA_PRE', 'INSUF_RENAL_PRE',
       'C_ISQUEM_PRE', 'ARR_PRE', 'TEP_PRE', 'HDIA', 'TTO_BIOL', 'DIALISIS',
       'MIOCARDIOPATIAS_PRE', 'ANEMIA_PRE', 'VACUNADO_NEUMOCOCO_13_PRE',
       'VACUNADO_NEUMOCOCO_23_PRE', 'TIPO_AFECT_PULMONAR', 'DERRAME_INGR',
       'TEP_INGR', 'VMNI', 'ONAF', 'IOT', 'CONFUSION'],
      dtype='object')

Numéricas:  Index(['EDAD', 'SEXO', 'ESTANCIA_TOT', 'ESTANCIA_MED', 'ESTANCIA_UCI',
       'TIPING', 'RESIDENCIA', 'GRADO_ICC', 'ALBUMINA_ING', 'BRR_TOT_ING',
       'LACTATO_ING', 'GOT_ING', 'GPT_ING', 'PCR_ING', 'LEUC_ING', 'PMN_ING',
       'PLAQUETAS_ING', 'ACT_PROTR_ING', 'PH_ING', 'P02_ART_ING',
       'PCO2_ART_ING', 'NA_ING

In [6]:
data[0:2]

Unnamed: 0,EDAD,SEXO,ESTANCIA_MED,ESTANCIA_UCI,BROTE_INTROHOSP,EXITUS,HIPOTIROIDISMO_PRE,HTA_PRE,EPOC_PRE,ASMA_BRONQUIAL_PRE,HEPAT_PRE,FUMADOR,EXFUMADOR,ICC_PRE,GRADO_ICC,PREICTUS_PRE,DISLIPEMIA_PRE,DM_PRE,CANCER_NO_ACT,CANCER_TTO,SOBREPESO_OBESIDAD,VALVULOPATIA_PRE,INSUF_RENAL_PRE,C_ISQUEM_PRE,ARR_PRE,TEP_PRE,HDIA,TTO_BIOL,DIALISIS,MIOCARDIOPATIAS_PRE,ANEMIA_PRE,VACUNADO_NEUMOCOCO_13_PRE,VACUNADO_NEUMOCOCO_23_PRE,DERRAME_INGR,TEP_INGR,VMNI,ONAF,IOT,ALBUMINA_ING,BRR_TOT_ING,LACTATO_ING,GOT_ING,GPT_ING,PCR_ING,LEUC_ING,PMN_ING,PLAQUETAS_ING,ACT_PROTR_ING,NA_ING,K_ING,CR_ING,UREA_ING,TROPO_ING,FERRITINA_ING,IL6_ING,DIMEROD_ING,TAS_ING,TAD_ING,FC_ING,FR_ING,SAT02_ING,CONFUSION,AFECT_PULM_Infiltrado difuso bilateral,AFECT_PULM_Infiltrado difuso unilateral,AFECT_PULM_Infiltrado o Condensacion lobar,AFECT_PULM_infiltrado periferico bilateral,AFECT_PULM_infiltrado periferico unilateral,PROCEDENCIA_Domicilio,PROCEDENCIA_Otras,PROCEDENCIA_Otro Hospital,PROCEDENCIA_Residencia,ESTANCIA_TOTAL
0,85.0,1,2.0,0.0,0,0.0,0,0,1,0,0,0,1,0,0,1.0,1,0,1,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3.3,1.72,359.0,30.0,24.0,1.14,7600.0,66.9,244000.0,97.0,140.0,5.0,0.89,48.0,13.9,73.0,31.67,5522.0,124.0,68.0,94.0,12.0,91.0,0,1,0,0,0,0,0,0,0,1,2.0
1,36.0,1,11.0,0.0,0,0.0,0,0,0,1,0,0,0,0,0,0.0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.6,0.56,478.0,27.0,29.0,3.44,6660.0,39.6,187000.0,85.0,139.0,3.7,0.89,39.0,1.5,310.0,26.14,383.0,110.0,70.0,80.0,16.0,98.0,0,1,0,0,0,0,1,0,0,0,11.0


## Columnas del modelo a Priori

In [7]:
columnas_totales = ['ESTANCIA_MED',
                    'IL6_ING',
                    'ALBUMINA_ING',
                    'ONAF',
                    'DIMEROD_ING',
                    'PLAQUETAS_ING',
                    'CR_ING',
                    'TROPO_ING',
                    'PMN_ING',
                    'FR_ING',
                    'EDAD',
                    'BROTE_INTROHOSP',
                    'SAT02_ING',
                    'PCR_ING',
                    'FERRITINA_ING',
                    'AFECT_PULM_infiltrado periferico bilateral',
                    'GPT_ING',
                    'AFECT_PULM_Infiltrado difuso bilateral',
                    'ACT_PROTR_ING',
                    'VACUNADO_NEUMOCOCO_23_PRE',
                    'IOT',
                    'TAD_ING',
                    'CANCER_TTO',
                    'FC_ING',
                    'VACUNADO_NEUMOCOCO_13_PRE',
                    'EPOC_PRE',
                    'TAS_ING',
                    'LACTATO_ING',
                    'LEUC_ING',
                    'DERRAME_INGR',
                    'K_ING',
                    'HIPOTIROIDISMO_PRE',
                    'FUMADOR',
                    'ANEMIA_PRE',
                    'PROCEDENCIA_Otro Hospital',
                    'BRR_TOT_ING',
                    'HTA_PRE',
                    'C_ISQUEM_PRE',
                    'ESTANCIA_UCI',
                    'DM_PRE',
                    'AFECT_PULM_Infiltrado o Condensacion lobar',
                    'HEPAT_PRE',
                    'PROCEDENCIA_Domicilio',
                    'GOT_ING',
                    'SOBREPESO_OBESIDAD',
                    'GRADO_ICC',
                    'ASMA_BRONQUIAL_PRE',
                    'HDIA',
                    'DISLIPEMIA_PRE',
                    'PROCEDENCIA_Residencia',
                    'PROCEDENCIA_Otras',
                    'CANCER_NO_ACT',
                    'TEP_INGR',
                    'VMNI',
                    'AFECT_PULM_infiltrado periferico unilateral',
                    'TTO_BIOL',
                    'TEP_PRE',
                    'ARR_PRE',
                    'SEXO',
                    'NA_ING',
                    'AFECT_PULM_Infiltrado difuso unilateral',
                    'VALVULOPATIA_PRE',
                    'DIALISIS',
                    'EXFUMADOR',
                    'PREICTUS_PRE',
                    'MIOCARDIOPATIAS_PRE',
                    'ICC_PRE',
                    'UREA_ING',
                    'CONFUSION',
                    'INSUF_RENAL_PRE',
                    'EXITUS']

Eliminar = ['ESTANCIA_MED',
            'ONAF',
            'VACUNADO_NEUMOCOCO_23_PRE',
            'IOT',
            'VACUNADO_NEUMOCOCO_13_PRE',
            'PROCEDENCIA_Otro Hospital',
            'ESTANCIA_UCI',
            'PROCEDENCIA_Domicilio',
            'PROCEDENCIA_Otras',
            'VMNI',
            'TTO_BIOL']

Modelo_a_Priori = list(set(columnas_totales) - set(Eliminar))


data = data[Modelo_a_Priori]

In [8]:
transformar = ['AFECT_PULM_Infiltrado difuso bilateral',
               'AFECT_PULM_Infiltrado difuso unilateral',
               'AFECT_PULM_Infiltrado o Condensacion lobar',
               'AFECT_PULM_infiltrado periferico bilateral',
               'AFECT_PULM_infiltrado periferico unilateral', 'PROCEDENCIA_Domicilio',
               'PROCEDENCIA_Otras', 'PROCEDENCIA_Otro Hospital',
               'PROCEDENCIA_Residencia']

transformadas = [x.replace(' ','_').upper() for x in transformar]

In [9]:
data.rename(columns = dict(zip(transformar, transformadas)), inplace = True)

# Estos son los datos definitivos que vamos a pasar por correo

In [10]:
data

Unnamed: 0,PROCEDENCIA_RESIDENCIA,DIMEROD_ING,ANEMIA_PRE,SEXO,FERRITINA_ING,GPT_ING,EPOC_PRE,GRADO_ICC,EXITUS,PCR_ING,INSUF_RENAL_PRE,FC_ING,LACTATO_ING,TAS_ING,DM_PRE,VALVULOPATIA_PRE,ALBUMINA_ING,MIOCARDIOPATIAS_PRE,CONFUSION,HEPAT_PRE,CANCER_NO_ACT,AFECT_PULM_INFILTRADO_PERIFERICO_BILATERAL,UREA_ING,TEP_PRE,K_ING,HIPOTIROIDISMO_PRE,PREICTUS_PRE,AFECT_PULM_INFILTRADO_DIFUSO_UNILATERAL,DISLIPEMIA_PRE,IL6_ING,FR_ING,GOT_ING,SAT02_ING,FUMADOR,ARR_PRE,DIALISIS,TROPO_ING,PMN_ING,AFECT_PULM_INFILTRADO_PERIFERICO_UNILATERAL,C_ISQUEM_PRE,TAD_ING,ASMA_BRONQUIAL_PRE,EXFUMADOR,CANCER_TTO,SOBREPESO_OBESIDAD,LEUC_ING,HTA_PRE,CR_ING,ACT_PROTR_ING,BRR_TOT_ING,NA_ING,HDIA,ICC_PRE,PLAQUETAS_ING,AFECT_PULM_INFILTRADO_O_CONDENSACION_LOBAR,EDAD,DERRAME_INGR,TEP_INGR,BROTE_INTROHOSP,AFECT_PULM_INFILTRADO_DIFUSO_BILATERAL
0,1,5522.0,0,1,73.0,24.0,1,0,0.0,1.14,0,94.0,359.0,124.0,0,0,3.3,0,0,0,1,0,48.00,0,5.0,0,1.0,0,1,31.67,12.0,30.0,91.0,0,1,0,13.9,66.90,0,0,68.0,0,1,0.0,0,7600.0,0,0.89,97.0,1.72,140.0,0,0,244000.0,0,85.0,0,0,0,1
1,0,383.0,0,1,310.0,29.0,0,0,0.0,3.44,0,80.0,478.0,110.0,0,0,3.6,0,0,0,0,0,39.00,0,3.7,0,0.0,0,0,26.14,16.0,27.0,98.0,0,0,0,1.5,39.60,0,0,70.0,1,0,0.0,0,6660.0,0,0.89,85.0,0.56,139.0,0,0,187000.0,0,36.0,0,0,0,1
2,0,302.0,0,1,122.0,19.0,0,0,1.0,18.01,0,82.0,886.0,135.0,0,0,3.3,0,0,0,0,0,32.00,0,3.8,0,0.0,0,0,658.60,30.0,41.0,93.0,0,0,0,8.5,88.70,0,0,70.0,0,0,0.0,1,12450.0,0,0.92,62.0,0.46,145.0,0,0,169000.0,0,48.0,0,0,0,1
3,1,882.0,0,0,283.0,31.0,1,2,1.0,9.74,0,99.0,811.0,180.0,0,1,2.8,0,0,0,0,0,0.46,0,4.6,1,0.0,0,1,326.40,28.0,38.0,93.0,0,0,0,71.3,88.30,0,0,90.0,1,0,0.0,0,18500.0,1,0.61,82.0,0.63,142.0,0,1,225000.0,0,95.0,0,0,0,1
4,0,1024.0,1,1,313.0,19.0,0,0,0.0,10.73,1,60.0,640.0,150.0,1,0,3.2,0,0,0,1,0,112.00,0,4.5,0,0.0,0,1,99.57,22.0,28.0,94.0,0,0,1,21.8,96.08,0,0,80.0,0,0,0.0,0,6580.0,1,3.09,95.0,0.29,141.0,1,0,140000.0,0,64.0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,0,506.0,0,0,434.0,36.0,0,0,0.0,8.70,0,92.0,693.0,140.0,0,0,3.7,0,0,0,0,1,17.00,0,4.2,0,0.0,0,0,73.00,15.0,38.0,99.0,0,0,0,3.1,80.00,0,0,70.0,0,0,0.0,0,5610.0,0,0.49,82.0,0.32,134.0,0,0,171000.0,0,52.0,0,0,0,0
1118,0,1471.0,0,0,1501.0,149.0,0,0,0.0,7.32,0,97.0,631.0,153.0,0,0,3.0,0,0,0,0,1,17.30,0,4.0,0,0.0,0,0,34.50,12.0,219.0,96.0,0,0,0,10.0,81.00,0,0,88.0,0,0,0.0,0,6800.0,0,0.73,93.0,0.55,139.0,0,0,202600.0,0,49.0,0,0,0,0
1119,0,14678.0,0,1,1049.0,46.0,0,0,0.0,11.20,0,95.0,750.0,165.0,0,0,3.2,0,0,0,0,1,53.00,0,3.9,0,0.0,0,1,80.12,12.0,63.0,86.0,1,0,0,10.0,80.00,0,0,105.0,0,0,0.0,0,8470.0,1,0.91,70.0,1.05,138.0,0,0,310900.0,0,53.0,0,1,0,0
1120,0,1248.0,0,0,546.0,15.0,0,0,0.0,13.46,1,95.0,325.0,140.0,0,0,3.0,0,0,0,0,1,61.00,0,4.8,0,0.0,0,1,43.29,12.0,20.0,97.0,0,1,0,9.7,78.00,0,0,90.0,0,0,0.0,1,8360.0,1,1.77,111.0,0.35,135.0,0,0,168600.0,0,85.0,0,0,0,0


In [13]:
import pyreadstat

pyreadstat.write_sav(data, 'Data_preprocesada.sav')

In [18]:
data[[x.replace(' ','_').upper() for x in Modelo_a_Priori]]

Unnamed: 0,FC_ING,AFECT_PULM_INFILTRADO_PERIFERICO_BILATERAL,DIMEROD_ING,ICC_PRE,CONFUSION,ANEMIA_PRE,PLAQUETAS_ING,VALVULOPATIA_PRE,DISLIPEMIA_PRE,UREA_ING,DERRAME_INGR,TEP_PRE,C_ISQUEM_PRE,FR_ING,PREICTUS_PRE,FERRITINA_ING,BRR_TOT_ING,CR_ING,FUMADOR,EDAD,CANCER_TTO,GPT_ING,ASMA_BRONQUIAL_PRE,PROCEDENCIA_RESIDENCIA,DM_PRE,K_ING,IL6_ING,GRADO_ICC,LACTATO_ING,SOBREPESO_OBESIDAD,ARR_PRE,AFECT_PULM_INFILTRADO_DIFUSO_UNILATERAL,LEUC_ING,CANCER_NO_ACT,EXITUS,AFECT_PULM_INFILTRADO_PERIFERICO_UNILATERAL,TAD_ING,TROPO_ING,PMN_ING,DIALISIS,MIOCARDIOPATIAS_PRE,AFECT_PULM_INFILTRADO_O_CONDENSACION_LOBAR,GOT_ING,AFECT_PULM_INFILTRADO_DIFUSO_BILATERAL,EPOC_PRE,INSUF_RENAL_PRE,PCR_ING,HIPOTIROIDISMO_PRE,ACT_PROTR_ING,HDIA,TAS_ING,HEPAT_PRE,HTA_PRE,BROTE_INTROHOSP,NA_ING,ALBUMINA_ING,EXFUMADOR,SEXO,TEP_INGR,SAT02_ING
0,94.0,0,5522.0,0,0,0,244000.0,0,1,48.00,0,0,0,12.0,1.0,73.0,1.72,0.89,0,85.0,0.0,24.0,0,1,0,5.0,31.67,0,359.0,0,1,0,7600.0,1,0.0,0,68.0,13.9,66.90,0,0,0,30.0,1,1,0,1.14,0,97.0,0,124.0,0,0,0,140.0,3.3,1,1,0,91.0
1,80.0,0,383.0,0,0,0,187000.0,0,0,39.00,0,0,0,16.0,0.0,310.0,0.56,0.89,0,36.0,0.0,29.0,1,0,0,3.7,26.14,0,478.0,0,0,0,6660.0,0,0.0,0,70.0,1.5,39.60,0,0,0,27.0,1,0,0,3.44,0,85.0,0,110.0,0,0,0,139.0,3.6,0,1,0,98.0
2,82.0,0,302.0,0,0,0,169000.0,0,0,32.00,0,0,0,30.0,0.0,122.0,0.46,0.92,0,48.0,0.0,19.0,0,0,0,3.8,658.60,0,886.0,1,0,0,12450.0,0,1.0,0,70.0,8.5,88.70,0,0,0,41.0,1,0,0,18.01,0,62.0,0,135.0,0,0,0,145.0,3.3,0,1,0,93.0
3,99.0,0,882.0,1,0,0,225000.0,1,1,0.46,0,0,0,28.0,0.0,283.0,0.63,0.61,0,95.0,0.0,31.0,1,1,0,4.6,326.40,2,811.0,0,0,0,18500.0,0,1.0,0,90.0,71.3,88.30,0,0,0,38.0,1,1,0,9.74,1,82.0,0,180.0,0,1,0,142.0,2.8,0,0,0,93.0
4,60.0,0,1024.0,0,0,1,140000.0,0,1,112.00,0,0,0,22.0,0.0,313.0,0.29,3.09,0,64.0,0.0,19.0,0,0,1,4.5,99.57,0,640.0,0,0,0,6580.0,1,0.0,0,80.0,21.8,96.08,1,0,0,28.0,1,0,1,10.73,0,95.0,1,150.0,0,1,0,141.0,3.2,0,1,0,94.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,92.0,1,506.0,0,0,0,171000.0,0,0,17.00,0,0,0,15.0,0.0,434.0,0.32,0.49,0,52.0,0.0,36.0,0,0,0,4.2,73.00,0,693.0,0,0,0,5610.0,0,0.0,0,70.0,3.1,80.00,0,0,0,38.0,0,0,0,8.70,0,82.0,0,140.0,0,0,0,134.0,3.7,0,0,0,99.0
1118,97.0,1,1471.0,0,0,0,202600.0,0,0,17.30,0,0,0,12.0,0.0,1501.0,0.55,0.73,0,49.0,0.0,149.0,0,0,0,4.0,34.50,0,631.0,0,0,0,6800.0,0,0.0,0,88.0,10.0,81.00,0,0,0,219.0,0,0,0,7.32,0,93.0,0,153.0,0,0,0,139.0,3.0,0,0,0,96.0
1119,95.0,1,14678.0,0,0,0,310900.0,0,1,53.00,0,0,0,12.0,0.0,1049.0,1.05,0.91,1,53.0,0.0,46.0,0,0,0,3.9,80.12,0,750.0,0,0,0,8470.0,0,0.0,0,105.0,10.0,80.00,0,0,0,63.0,0,0,0,11.20,0,70.0,0,165.0,0,1,0,138.0,3.2,0,1,1,86.0
1120,95.0,1,1248.0,0,0,0,168600.0,0,1,61.00,0,0,0,12.0,0.0,546.0,0.35,1.77,0,85.0,0.0,15.0,0,0,0,4.8,43.29,0,325.0,1,1,0,8360.0,0,0.0,0,90.0,9.7,78.00,0,0,0,20.0,0,0,1,13.46,0,111.0,0,140.0,0,1,0,135.0,3.0,0,0,0,97.0


In [20]:
data2 = data[[x.replace(' ','_').upper() for x in Modelo_a_Priori]].copy()

pyreadstat.write_sav(data2, 'Data_preprocesada_modelo_APriori.sav')