In [98]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import kaggle
import shutil
import zipfile
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_predict, cross_val_score
from sklearn.feature_selection import SelectKBest, chi2, SelectFromModel
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.svm import SVC, NuSVC, LinearSVC, SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [116]:
!kaggle datasets download alphiree/cardiovascular-diseases-risk-prediction-dataset

Downloading cardiovascular-diseases-risk-prediction-dataset.zip to c:\Users\gabca\OneDrive\Documentos\Hackaton-2023-PB




  0%|          | 0.00/4.87M [00:00<?, ?B/s]
 21%|██        | 1.00M/4.87M [00:00<00:01, 2.48MB/s]
 41%|████      | 2.00M/4.87M [00:00<00:00, 4.24MB/s]
 62%|██████▏   | 3.00M/4.87M [00:00<00:00, 4.82MB/s]
 82%|████████▏ | 4.00M/4.87M [00:00<00:00, 6.00MB/s]
100%|██████████| 4.87M/4.87M [00:00<00:00, 5.46MB/s]


In [117]:
# Nombre del archivo ZIP que deseas mover, extraer y eliminar
archivo_zip = "cardiovascular-diseases-risk-prediction-dataset.zip"

# Ruta del archivo ZIP actual
ruta_zip = os.path.abspath(archivo_zip)

# Ruta de la carpeta de destino
carpeta_destino = "Datasets"

# Mover el archivo ZIP a la carpeta de destino
shutil.move(ruta_zip, os.path.join(carpeta_destino, archivo_zip))

# Ruta completa del archivo ZIP en la carpeta de destino
ruta_zip_destino = os.path.join(carpeta_destino, archivo_zip)

# Descomprimir el archivo ZIP en la carpeta de destino
with zipfile.ZipFile(ruta_zip_destino, 'r') as zip_ref:
    zip_ref.extractall(carpeta_destino)

# Eliminar el archivo ZIP
os.remove(ruta_zip_destino)

print("Archivo ZIP movido, extraído y eliminado.")


Archivo ZIP movido, extraído y eliminado.


# Cargar y preprocesar los datos

In [118]:
from sklearn.preprocessing import OneHotEncoder

# Carga el archivo CSV en un DataFrame de pandas
data = pd.read_csv('Datasets\CVD_cleaned.csv', sep=',')
columnas = data.columns

data.head()

Unnamed: 0,General_Health,Checkup,Exercise,Heart_Disease,Skin_Cancer,Other_Cancer,Depression,Diabetes,Arthritis,Sex,Age_Category,Height_(cm),Weight_(kg),BMI,Smoking_History,Alcohol_Consumption,Fruit_Consumption,Green_Vegetables_Consumption,FriedPotato_Consumption
0,Poor,Within the past 2 years,No,No,No,No,No,No,Yes,Female,70-74,150.0,32.66,14.54,Yes,0.0,30.0,16.0,12.0
1,Very Good,Within the past year,No,Yes,No,No,No,Yes,No,Female,70-74,165.0,77.11,28.29,No,0.0,30.0,0.0,4.0
2,Very Good,Within the past year,Yes,No,No,No,No,Yes,No,Female,60-64,163.0,88.45,33.47,No,4.0,12.0,3.0,16.0
3,Poor,Within the past year,Yes,Yes,No,No,No,Yes,No,Male,75-79,180.0,93.44,28.73,No,0.0,30.0,30.0,8.0
4,Good,Within the past year,No,No,No,No,No,No,No,Male,80+,191.0,88.45,24.37,Yes,0.0,8.0,4.0,0.0


In [119]:
from sklearn.preprocessing import LabelEncoder

# Identifica las columnas que son de tipo objeto (categóricas)
categorical_columns = data.select_dtypes(include=['object']).columns

# Inicializa un objeto LabelEncoder para cada columna categórica
label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

data.shape

(308854, 19)

In [120]:
from sklearn.preprocessing import StandardScaler

# Estandarización
scaler = StandardScaler()
data = scaler.fit_transform(data)

data_scaled = pd.DataFrame(data, columns=columnas)

In [122]:
data_scaled.head()

Unnamed: 0,General_Health,Checkup,Exercise,Heart_Disease,Skin_Cancer,Other_Cancer,Depression,Diabetes,Arthritis,Sex,Age_Category,Height_(cm),Weight_(kg),BMI,Smoking_History,Alcohol_Consumption,Fruit_Consumption,Green_Vegetables_Consumption,FriedPotato_Consumption
0,0.486392,-1.485982,-1.856251,-0.296584,-0.327962,-0.32726,-0.500659,-0.425409,1.43381,-0.963315,0.98314,-1.93425,-2.38618,-2.159696,1.21063,-0.621527,0.006625,0.059597,0.664502
1,1.155689,0.475622,-1.856251,3.371722,-0.327962,-0.32726,-0.500659,2.335566,-0.697442,-0.963315,0.98314,-0.526857,-0.303547,-0.051548,-0.826016,-0.621527,0.006625,-1.012342,-0.267579
2,1.155689,0.475622,0.53872,-0.296584,-0.327962,-0.32726,-0.500659,2.335566,-0.697442,-0.963315,0.415526,-0.71451,0.22777,0.742649,-0.826016,-0.133707,-0.716973,-0.811354,1.130543
3,0.486392,0.475622,0.53872,3.371722,-0.327962,-0.32726,-0.500659,2.335566,-0.697442,1.038082,1.266947,0.880535,0.461569,0.015913,-0.826016,-0.621527,0.006625,0.997544,0.198462
4,-0.182904,0.475622,-1.856251,-0.296584,-0.327962,-0.32726,-0.500659,-0.425409,-0.697442,1.038082,1.550754,1.912623,0.22777,-0.652562,1.21063,-0.621527,-0.877772,-0.744358,-0.73362
