# Dados Numéricos — Kaggle Cardiovascular Disease

Este notebook instala a API do Kaggle, autentica, baixa o dataset `sulianova/cardiovascular-disease-dataset`, carrega com pandas e realiza uma EDA básica (head, info, describe).

In [1]:
# Instalações básicas (Colab/Jupyter)
%pip -q install kaggle pandas
print('Dependências OK')

Note: you may need to restart the kernel to use updated packages.
Dependências OK


In [2]:
# A autenticação do Kaggle será tratada automaticamente na próxima célula se variáveis de ambiente existirem.
print('Se KAGGLE_USERNAME/KAGGLE_KEY estiverem definidos, o download usará essas credenciais.')

Se KAGGLE_USERNAME/KAGGLE_KEY estiverem definidos, o download usará essas credenciais.


In [3]:
# Download temporário e carregamento em DataFrame
import os, json, pathlib, sys, tempfile, subprocess, glob
import pandas as pd

_tmp = tempfile.TemporaryDirectory()
data_dir = pathlib.Path(_tmp.name) / 'numericos'
data_dir.mkdir(parents=True, exist_ok=True)

df = None
try:
    if os.getenv('KAGGLE_USERNAME') and os.getenv('KAGGLE_KEY'):
        # Garantir kaggle.json
        kdir = pathlib.Path.home() / '.kaggle'
        kdir.mkdir(exist_ok=True)
        cred_path = kdir / 'kaggle.json'
        cred_path.write_text(json.dumps({
            'username': os.getenv('KAGGLE_USERNAME'),
            'key': os.getenv('KAGGLE_KEY')
        }))
        # Baixar via Kaggle CLI
        cmd = [sys.executable, '-m', 'kaggle', 'datasets', 'download', '-d',
               'sulianova/cardiovascular-disease-dataset', '-p', str(data_dir), '--unzip']
        print('Baixando dataset Kaggle para:', data_dir)
        subprocess.check_call(cmd)
        files = glob.glob(str(data_dir / '*.csv'))
        if files:
            df = pd.read_csv(files[0])
            print('Arquivo carregado:', files[0])
        else:
            print('CSV não encontrado após download Kaggle. Usando amostra sintética...')
except Exception as e:
    print('Falha no download Kaggle:', e)

if df is None:
    # Fallback: amostra sintética mínima compatível
    import io
    csv_text = """id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,17500,2,169,62,120,80,1,1,0,0,1,0
1,20000,1,156,85,140,90,2,1,0,0,0,1
2,22000,2,165,70,160,100,3,2,1,1,1,1
3,19000,1,180,95,130,85,1,1,0,0,1,0
"""
    df = pd.read_csv(io.StringIO(csv_text))
    print('Usando amostra sintética (sem Kaggle).')

Usando amostra sintética (sem Kaggle).


In [4]:
# EDA básica
if 'df' not in globals() or df is None:
    raise RuntimeError('DataFrame não disponível. Verifique a célula de download/carregamento.')

display(df.head())
print(df.info())
display(df.describe(include='all'))

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,17500,2,169,62,120,80,1,1,0,0,1,0
1,1,20000,1,156,85,140,90,2,1,0,0,0,1
2,2,22000,2,165,70,160,100,3,2,1,1,1,1
3,3,19000,1,180,95,130,85,1,1,0,0,1,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   id           4 non-null      int64
 1   age          4 non-null      int64
 2   gender       4 non-null      int64
 3   height       4 non-null      int64
 4   weight       4 non-null      int64
 5   ap_hi        4 non-null      int64
 6   ap_lo        4 non-null      int64
 7   cholesterol  4 non-null      int64
 8   gluc         4 non-null      int64
 9   smoke        4 non-null      int64
 10  alco         4 non-null      int64
 11  active       4 non-null      int64
 12  cardio       4 non-null      int64
dtypes: int64(13)
memory usage: 548.0 bytes
None


Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
count,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
mean,1.5,19625.0,1.5,167.5,78.0,137.5,88.75,1.75,1.25,0.25,0.25,0.75,0.5
std,1.290994,1887.458609,0.57735,9.949874,14.809907,17.078251,8.539126,0.957427,0.5,0.5,0.5,0.5,0.57735
min,0.0,17500.0,1.0,156.0,62.0,120.0,80.0,1.0,1.0,0.0,0.0,0.0,0.0
25%,0.75,18625.0,1.0,162.75,68.0,127.5,83.75,1.0,1.0,0.0,0.0,0.75,0.0
50%,1.5,19500.0,1.5,167.0,77.5,135.0,87.5,1.5,1.0,0.0,0.0,1.0,0.5
75%,2.25,20500.0,2.0,171.75,87.5,145.0,92.5,2.25,1.25,0.25,0.25,1.0,1.0
max,3.0,22000.0,2.0,180.0,95.0,160.0,100.0,3.0,2.0,1.0,1.0,1.0,1.0
