# Librearias

In [17]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from lightgbm import LGBMClassifier
import pickle

# Lectura de datos

In [2]:
df = pd.read_csv("heart.csv")
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [3]:
#Reconociendo los tipos de datos
df.dtypes

Age                 int64
Sex                object
ChestPainType      object
RestingBP           int64
Cholesterol         int64
FastingBS           int64
RestingECG         object
MaxHR               int64
ExerciseAngina     object
Oldpeak           float64
ST_Slope           object
HeartDisease        int64
dtype: object

In [5]:
#Separando las columnas de las variables en categoricas y numericas
numericas= df.drop(['HeartDisease'], axis=1).select_dtypes('number').columns

categoricas = df.select_dtypes('object').columns

# Preprocesamiento

In [23]:
encoder = OneHotEncoder()
column_transformer = make_column_transformer((encoder,categoricas), remainder='passthrough')  

## Separacion del conjunto de entrenamiento y de test

In [24]:
x = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

# Construccion y entrenamiento del modelo

In [25]:
model = LGBMClassifier(random_state=0)

## Pipeline

In [27]:
pipe = make_pipeline(column_transformer, model)

In [28]:
pipe.fit(x_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(),
                                                  Index(['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope'], dtype='object'))])),
                ('lgbmclassifier', LGBMClassifier(random_state=0))])

# Evaluacion del modelo

In [79]:
y_pred = pipe.predict(x_test)

array([[1.19723396e-02, 9.88027660e-01],
       [7.23968072e-02, 9.27603193e-01],
       [3.61158400e-04, 9.99638842e-01],
       [1.36030307e-02, 9.86396969e-01],
       [6.18992486e-01, 3.81007514e-01],
       [8.46131384e-01, 1.53868616e-01],
       [9.99186550e-01, 8.13449714e-04],
       [9.99873000e-01, 1.26999585e-04],
       [9.94917121e-01, 5.08287911e-03],
       [9.98917505e-01, 1.08249466e-03],
       [8.72661940e-04, 9.99127338e-01],
       [2.51200072e-02, 9.74879993e-01],
       [2.68875453e-04, 9.99731125e-01],
       [4.98873118e-03, 9.95011269e-01],
       [9.69570951e-01, 3.04290490e-02],
       [1.01520580e-02, 9.89847942e-01],
       [2.17688290e-02, 9.78231171e-01],
       [1.41461589e-03, 9.98585384e-01],
       [7.96629205e-02, 9.20337079e-01],
       [9.81349864e-01, 1.86501356e-02],
       [4.58917440e-02, 9.54108256e-01],
       [7.32511190e-02, 9.26748881e-01],
       [1.35477541e-01, 8.64522459e-01],
       [3.51074646e-02, 9.64892535e-01],
       [9.995948

In [30]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.81      0.82       113
           1       0.87      0.90      0.88       163

    accuracy                           0.86       276
   macro avg       0.86      0.85      0.85       276
weighted avg       0.86      0.86      0.86       276



# Exportacion del modelo y encoder

In [19]:
pickle.dump( model, open( "model_file.pkl", "wb" ) )

In [20]:
pickle.dump( encoder, open( "encoder_file.pkl", "wb" ) )

In [21]:
pickle.dump( pipe, open( "pipe_file.pkl", "wb" ) )