<a href="https://colab.research.google.com/github/OseiasBeu/Cardiovascular-diseases-risk-project/blob/main/Pipeline_Cardiovascular_diseases_risk_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import requests
import zipfile
import io
import os
import numpy as np
import pickle

In [24]:
import requests

# URL do arquivo que você deseja baixar
file_url = 'https://github.com/OseiasBeu/Cardiovascular-diseases-risk-project/raw/main/model/final_dtc_model_cdr_r.pkl'

# Nome do arquivo de destino
file_name = 'final_dtc_model_cdr_r.pkl'

# Faça o download do arquivo
response = requests.get(file_url)

# Verifique se a solicitação foi bem-sucedida
if response.status_code == 200:
    # Salve o arquivo no disco
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'O arquivo foi baixado com sucesso e salvo como {file_name}')
else:
    print(f'Erro ao fazer o download do arquivo. Status Code: {response.status_code}')


O arquivo foi baixado com sucesso e salvo como final_dtc_model_cdr_r.pkl


In [5]:
# data = pd.read_csv('/content/dataset/dataset/CVD_cleaned.csv')

In [6]:
# data.head()

In [7]:
# data.columns

In [8]:
mock_data = {
    'General_Health': 'Poor',
    'Checkup': 'Within the past 2 years',
    'Exercise': 'No',
    'Skin_Cancer': 'No',
    'Other_Cancer': 'No',
    'Depression': 'No',
    'Diabetes': 'No',
    'Arthritis': 'Yes',
    'Sex': 'Female',
    'Age_Category': '70-74',
    'Height_(cm)': 150.0,
    'Weight_(kg)': 32.66,
    'BMI': 14.54,
    'Smoking_History': 'Yes',
    'Alcohol_Consumption': 0.0,
    'Fruit_Consumption': 30.0,
    'Green_Vegetables_Consumption': 16.0,
    'FriedPotato_Consumption': 12.0
    }

In [9]:
# Transforme o dicionário em um DataFrame
data = pd.DataFrame([mock_data])

# Agora, 'df' conterá os dados do dicionário como um DataFrame
data.head()

Unnamed: 0,General_Health,Checkup,Exercise,Skin_Cancer,Other_Cancer,Depression,Diabetes,Arthritis,Sex,Age_Category,Height_(cm),Weight_(kg),BMI,Smoking_History,Alcohol_Consumption,Fruit_Consumption,Green_Vegetables_Consumption,FriedPotato_Consumption
0,Poor,Within the past 2 years,No,No,No,No,No,Yes,Female,70-74,150.0,32.66,14.54,Yes,0.0,30.0,16.0,12.0


In [10]:
mapper_hd = {
    'No':0,
    'Yes':1,
    np.nan: 1
}

mapper_sex = {
    'Female':0,
    'Male':1,
    np.nan: 1
}

In [11]:
# data['Heart_Disease'] = data['Heart_Disease'].map(mapper_hd,na_action=None)
data['Sex'] = data['Sex'].map(mapper_sex,na_action=None)
data['Exercise'] = data['Exercise'].map(mapper_hd,na_action=None)
data['Skin_Cancer'] = data['Skin_Cancer'].map(mapper_hd,na_action=None)
data['Other_Cancer'] = data['Other_Cancer'].map(mapper_hd,na_action=None)
data['Depression'] = data['Depression'].map(mapper_hd,na_action=None)
data['Arthritis'] = data['Arthritis'].map(mapper_hd,na_action=None)
data['Smoking_History'] = data['Smoking_History'].map(mapper_hd,na_action=None)

In [12]:
data.loc[data['Checkup'] == 'Within the past year', 'last_Checkup_years'] = '<1'
data.loc[data['Checkup'] == 'Within the past 2 years', 'last_Checkup_years'] = '<2'
data.loc[data['Checkup'] == 'Within the past 5 years', 'last_Checkup_years'] = '<5'
data.loc[data['Checkup'] == '5 or more years ago', 'last_Checkup_years'] = '>5'
data.loc[data['Checkup'] == 'Never', 'last_Checkup_years'] = 'Never'

In [13]:
data.loc[data['Diabetes'] == 'No', 'diabetes?'] = 'No'
data.loc[data['Diabetes'] == 'Yes', 'diabetes?'] = 'Yes'
data.loc[data['Diabetes'] == 'No, pre-diabetes or borderline diabetes', 'diabetes?'] = 'Pre_diabetes_boderline'
data.loc[data['Diabetes'] == 'Yes, but female told only during pregnancy', 'diabetes?'] = 'Yes_scovered_in_pregnancy'

In [14]:
# Get one hot encoding of columns
one_hot = pd.get_dummies(data[['General_Health','Age_Category','last_Checkup_years', 'diabetes?']])
# Drop columns as it is now encoded
data = data.drop(['General_Health', 'Age_Category','last_Checkup_years','diabetes?','Checkup','Diabetes'],axis = 1)
# Join the encoded df
df = data.join(one_hot)

In [15]:
columns_Age = [
            'Age_Category_18-24', 'Age_Category_25-29',
            'Age_Category_30-34', 'Age_Category_35-39', 'Age_Category_40-44',
            'Age_Category_45-49', 'Age_Category_50-54', 'Age_Category_55-59',
            'Age_Category_60-64', 'Age_Category_65-69', 'Age_Category_70-74',
            'Age_Category_75-79', 'Age_Category_80+'
            ]

In [16]:
columns_GeneralHealth = [
            'General_Health_Excellent', 'General_Health_Fair',
            'General_Health_Good', 'General_Health_Poor',
            'General_Health_Very Good'
          ]

In [17]:
columns_last_checkup = [
                        'last_Checkup_years_<1',
                        'last_Checkup_years_<2', 'last_Checkup_years_<5',
                        'last_Checkup_years_>5', 'last_Checkup_years_Never'
                        ]

In [18]:
columns_diabetes = [
                    'diabetes?_No',
                    'diabetes?_Pre_diabetes_boderline', 'diabetes?_Yes',
                    'diabetes?_Yes_scovered_in_pregnancy'
                    ]

In [19]:
def create_features_with_zero(columns):
  for column in columns:
    if column not in df.columns:
      df[column] = 0

In [20]:
create_features_with_zero(columns_Age)
create_features_with_zero(columns_last_checkup)
create_features_with_zero(columns_GeneralHealth)
create_features_with_zero(columns_diabetes)

In [30]:
df.head()

Unnamed: 0,Exercise,Skin_Cancer,Other_Cancer,Depression,Arthritis,Sex,Height_(cm),Weight_(kg),BMI,Smoking_History,...,last_Checkup_years_<5,last_Checkup_years_>5,last_Checkup_years_Never,General_Health_Excellent,General_Health_Fair,General_Health_Good,General_Health_Very Good,diabetes?_Pre_diabetes_boderline,diabetes?_Yes,diabetes?_Yes_scovered_in_pregnancy
0,0,0,0,0,1,0,150.0,32.66,14.54,1,...,0,0,0,0,0,0,0,0,0,0


In [22]:
# df['Unnamed: 0'] =

In [26]:
with open('final_dtc_model_cdr_r.pkl', 'rb') as file:
    modelo_treinado = pickle.load(file)

In [27]:
print(type(modelo_treinado))

<class 'sklearn.tree._classes.DecisionTreeClassifier'>


In [28]:
previsoes = modelo_treinado.predict_proba(df)

ValueError: ignored