In [32]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle

# Cargar los datos
data = pd.read_csv('heart15.csv')

# Verificar los datos
print(data.head())
print(data['output'].value_counts())

# Separar las características y la variable objetivo
x = data.drop(columns='output', axis=1)
y = data['output']

# Dividir los datos en conjuntos de entrenamiento y prueba
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=2)

# Entrenar el modelo de regresión logística
model = LogisticRegression(max_iter=1000)
model.fit(x_train, y_train)

# Evaluar el modelo
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(y_train, x_train_prediction)
training_data_precision = precision_score(y_train, x_train_prediction)
training_data_recall = recall_score(y_train, x_train_prediction)
training_data_f1 = f1_score(y_train, x_train_prediction)

print('Train Accuracy: ', training_data_accuracy)
print('Train Precision: ', training_data_precision)
print('Train Recall: ', training_data_recall)
print('Train F1 Score: ', training_data_f1)

x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(y_test, x_test_prediction)
test_data_precision = precision_score(y_test, x_test_prediction)
test_data_recall = recall_score(y_test, x_test_prediction)
test_data_f1 = f1_score(y_test, x_test_prediction)

print('Test Accuracy: ', test_data_accuracy)
print('Test Precision: ', test_data_precision)
print('Test Recall: ', test_data_recall)
print('Test F1 Score: ', test_data_f1)

# Guardar el modelo
with open('heartAttackPrediction.pkl', 'wb') as file:
    pickle.dump(model, file)


   age  sex  cp  trtbps  chol  fbs  restecg  thalachh  exng  oldpeak  slp  \
0   63    1   3     145   233    1        0       150     0      2.3    0   
1   37    1   2     130   250    0        1       187     0      3.5    0   
2   41    0   1     130   204    0        0       172     0      1.4    2   
3   56    1   1     120   236    0        1       178     0      0.8    2   
4   57    0   0     120   354    0        1       163     1      0.6    2   

   caa  thall  output  
0    0      1       1  
1    0      2       1  
2    0      2       1  
3    0      2       1  
4    0      2       1  
output
1    165
0    138
Name: count, dtype: int64
Train Accuracy:  0.8553719008264463
Train Precision:  0.8299319727891157
Train Recall:  0.9242424242424242
Train F1 Score:  0.8745519713261649
Test Accuracy:  0.8032786885245902
Test Precision:  0.8181818181818182
Test Recall:  0.8181818181818182
Test F1 Score:  0.8181818181818182
