## Modelos 

In [9]:
# Librerias requeridas
import pandas as pd
import numpy as np
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.preprocessing import StandardScaler # Para normalizar de datos

# Actualizar el archivo de funciones
import importlib 
import A_Funciones as funciones # Este archivo contiene las funciones a utilizar
importlib.reload(funciones) # Actualiza los cambios en el archivo de las funciones

# Algoritmos para los modelos 
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRFRegressor
from sklearn.neighbors import KNeighborsRegressor 
from sklearn import svm
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split


In [15]:
# Cargar los datos de reclamaciones 
df_reclamaciones = joblib.load('Data_final\\df_final_reclamaciones.pkl')

In [16]:
df_reclamaciones

Unnamed: 0,Asegurado_Id,Sexo,Ciudad,CANCER,EPOC,DIABETES,HIPERTENSION,ENF_CARDIOVASCULAR,Reclamacion,Eventos,Valor_Pagado,Edad,Duracion_Poliza_Dias,Clasificacion_Diagnostico
0,18686469,1.0,Bogota,0,0,0,0,0.0,EXAMENES DE DIAGNOSTICO,3,1579219,40,61.0,Diagnostico Pendiente
1,18686469,1.0,Bogota,0,0,0,0,0.0,CONSULTA EXTERNA,1,442805,40,61.0,Diagnostico Pendiente
2,16087775,0.0,Medellin,0,0,0,0,0.0,CONSULTA DE URGENCIAS,1,461626,40,729.0,Diagnostico Pendiente
3,16087775,0.0,Medellin,0,0,0,0,0.0,CONSULTA DE URGENCIAS,1,593054,40,729.0,Diagnostico Pendiente
4,16087775,0.0,Medellin,0,0,0,0,0.0,CONSULTA DE URGENCIAS,1,455257,40,729.0,Diagnostico Pendiente
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3781400,34178573,1.0,Cali,0,0,0,0,0.0,EXAMENES DE DIAGNOSTICO,1,579721,45,729.0,Pruebas complementarias
3781401,34178573,1.0,Cali,0,0,0,0,0.0,EXAMENES DE DIAGNOSTICO,1,479786,45,729.0,Diagnostico Pendiente
3781402,34178573,1.0,Cali,0,0,0,0,0.0,LABORATORIO CLINICO,1,675070,45,729.0,Diagnostico Pendiente
3781403,34178573,1.0,Cali,0,0,0,0,0.0,CONSULTA EXTERNA,2,565193,45,729.0,Diagnostico Pendiente


### Modelo reclamaciones

Este modelo se realiza con el propósito de cumplir uno de los primeros objetivos del reto, que es lograr predecir los costos asociados que tendrán los usuarios respectos a sus seguros en salud a lo largo para así poder tarifar los seguros. 

In [17]:
# Generamos variables dummies
df_reclamaciones = pd.get_dummies(df_reclamaciones, dtype=int)


In [18]:
y = df_reclamaciones.Valor_Pagado
X0 = df_reclamaciones.loc[:,~ df_reclamaciones.columns.isin(['Valor_Pagado','Asegurado_Id'])]

In [19]:
scaler = StandardScaler()
scaler.fit(X0)

X1 = scaler.transform(X0)
X = pd.DataFrame(X1 , columns = X0.columns)
X

Unnamed: 0,Sexo,CANCER,EPOC,DIABETES,HIPERTENSION,ENF_CARDIOVASCULAR,Eventos,Edad,Duracion_Poliza_Dias,Ciudad_Barranquilla,...,Clasificacion_Diagnostico_Enfermedades del sistema nervioso,Clasificacion_Diagnostico_Enfermedades digestivas,Clasificacion_Diagnostico_Enfermedades endocrinas,Clasificacion_Diagnostico_Enfermedades respiratorias,Clasificacion_Diagnostico_Factores que influyen en el estado de salud (Servicios Sanitarios),Clasificacion_Diagnostico_Lesiones traumaticas,Clasificacion_Diagnostico_Malformaciones congenitas,Clasificacion_Diagnostico_Neoplasias,Clasificacion_Diagnostico_Pruebas complementarias,Clasificacion_Diagnostico_Trastornos mentales
0,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,0.200468,0.253115,-2.265091,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
1,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.253115,-2.265091,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
2,-1.521545,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.253115,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
3,-1.521545,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.253115,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
4,-1.521545,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.253115,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640817,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.966383,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,10.088100,-0.01212
1640818,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.966383,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
1640819,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.362537,0.966383,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
1640820,0.657227,-0.046288,-0.144441,-0.220816,-0.084373,-0.03936,-0.081034,0.966383,0.683505,-0.16088,...,-0.044673,-0.084431,-0.011659,-0.039731,-0.255179,-0.084643,-0.008225,-0.052271,-0.099127,-0.01212
