# 0.0 IMPORTS

In [None]:
import warnings
warnings.simplefilter('ignore')

import time
import numpy as np 
import pandas as pd 
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import scipy.stats as stats

from sklearn import decomposition 
from sklearn.preprocessing import StandardScaler 

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

from sklearn.metrics import classification_report 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score,accuracy_score,classification_report
from sklearn.preprocessing import LabelEncoder
import pickle

pd.set_option('display.max_columns', None)

## 0.1 LOAD DATA

In [None]:
data = pd.read_csv('C:\\Users\\luizv\\Documents\\cardio_disease\\cardio_train.csv',sep=';')

# 1.0 DATA DESCRIPTION

In [None]:
df1 = data.copy()

## 1.1 DATA DIMENSIONS

In [None]:
print( 'Number of Rows: {}'.format( df1.shape[0] ) )
print( 'Number of Cols: {}'.format( df1.shape[1] ) )

## 1.2 Data Types

In [None]:
df1.dtypes

## 1.3 Check NA

In [None]:
df1.isna().sum()

## 1.4 Descriptive Statistics

In [None]:
num_attributes = df1.select_dtypes( include=['int64', 'float64'] )

### 1.4.1 NUMERICAL ATTRIBUTES

In [None]:
# Central Tendency 
ct1 = pd.DataFrame( num_attributes.apply( np.mean ) ).T
ct2 = pd.DataFrame( num_attributes.apply( np.median ) ).T

# dispersion 
d1 = pd.DataFrame( num_attributes.apply( np.std ) ).T
d2 = pd.DataFrame( num_attributes.apply( min ) ).T
d3 = pd.DataFrame( num_attributes.apply( max ) ).T
d4 = pd.DataFrame( num_attributes.apply( lambda x: x.max() - x.min() ) ).T
d5 = pd.DataFrame( num_attributes.apply( lambda x: x.skew() ) ).T
d6 = pd.DataFrame( num_attributes.apply( lambda x: x.kurtosis() ) ).T

# concate
m = pd.concat( [d2, d3, d4, ct1, ct2, d1, d5, d6] ).T.reset_index()
m.columns = ['attributes', 'min', 'max', 'range', 'mean', 'median', 'std','skew', 'kurtosis']
m

*  age = age of person in days
* height = height of person in cm
* weight = weight of person in kg
* ap_hi = systolic blood pressure , normal value = 120 mmhg
* ap_lo = diastolic blood pressure , normal value = 80 mmhg
* cholesterol = adults normal value = 200 mg/dL, children normal value = 170 mg/dL

* gluc = glucose level , normal value = 70 ~80 mg/dL
* smoke = 0 : 'Not a Smoker', 1 : 'Smoker'
* alco = 0 : 'Not a Alchoalic', 1 : 'Alchoalic'
* active = 0 : 'Not involved in Physical Activites', 1 : 'involved in physical activites'

* cardio = 0 : 'Not Have Cardiovascular diseases', 1 : 'Have Cardiovascular diseases'

# 2.0 Feature Engineering

In [None]:
df2 = df1.copy()

In [None]:
#Body max index
df2['bmi'] = df1['weight'] / ((df1['height'] / 100) ** 2)
df2['bmi'] = df2['bmi'].astype('int64')

# Pulse pression
df2['pulse_pression'] = df1['ap_hi'] - df1['ap_lo']

## 2.1 MODEL SCORE

In [None]:
def calculate_score(age, gender, cholesterol, systolic_bp, smoke):
    score = age
    if gender == 1:  # Mulher
        score += 5
    if cholesterol == 3:  # Colesterol acima do normal
        score += 1
    if systolic_bp > 140:  # Pressão arterial sistólica acima do normal
        score += 1
    if smoke == 1:  # Fumante
        score += 4
    return score

# Calcula o modelo SCORE para cada linha do dataframe e adiciona a coluna 'model_score'
df2['model_score'] = df2.apply(lambda x: calculate_score(x['age'], x['gender'], x['cholesterol'], x['ap_hi'],x['smoke']), axis=1)
df2['model_score'] = df2['model_score'].astype('int64')

# 3.0 EDA

In [None]:
df3= df2.copy()

In [None]:
df3.head()

## 3.1 CARDIO(TARGET)

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(10, 5))
sns.countplot(x='cardio', data=df2, palette='summer', hue='cardio')
plt.xlabel('Presence of cardiovascular disease', fontdict={'fontsize': 15,'color':'Green'}, labelpad=3)
plt.title('Distribution of Cardiovascular Disease in the Population', fontsize=20)
plt.legend(['No Disease', 'Disease'])
plt.grid(which='major', color='#666666', linestyle='-')
plt.show()


## 3.2 GENDER

women's average weight lower than men's average weight 

In [None]:
a = df3[df3["gender"]==2]["height"].mean()
b = df3[df3["gender"]==1]["height"].mean()
if a > b:
    gender = "male"
    gender1 = "female"
else:
    gender = "female"
    gender1 = "male"
print("Gender 2 is " + gender + " e Gender 1 is " + gender1)

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(10, 6))
sns.countplot(x='gender', hue='cardio', data=df2, palette="Set2")

plt.title("Gender bar plot")
plt.xlabel("Gender")
plt.ylabel("Contagem")


plt.show()

## 3.3 AGE

In [None]:
df3['age'] = df3['age'] / 365 # days for years
df3['age'] = df3['age'].round()

### 3.3.2 GENDER BY AGE

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='age', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='age', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's Age")
axes[1].set_title("Men's Age")
plt.tight_layout()
plt.show()

In [None]:
df_female[df_female['age'] <= 30]

In [None]:
df_male[df_male['age'] <= 30]

## 3.4 Height

### 3.4.1 VIOLIN PLOT

## 3.4.2 HEIGHT BY GENDER

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='height', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='height', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's height ")
axes[1].set_title(" Men's height ")
plt.tight_layout()
plt.show()

In [None]:
df_female[df_female['height']> 240]

In [None]:
#filter
df3 = df3.drop(df3[(df3['height'] == 250) & (df3['gender'] == 1)].index)

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='height', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='height', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's height ")
axes[1].set_title(" Men's height ")
plt.tight_layout()
plt.show()

## 3.5 Weight

### 3.5.1 VIOLIN PLOT

### 3.5.2 WEIGHT BY GENDER

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='weight', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='weight', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's weight ")
axes[1].set_title(" Men's weight ")
plt.tight_layout()
plt.show()

## 3.6 AP_HI (systolic blood pressure)

### 3.6.1 VIOLIN PLOT

In [None]:
#filter
df3 = df3[(df3['ap_hi'] > 0) & (df3['ap_hi'] < 300)]

### 3.6.2 AP_HI BY GENDER

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='ap_hi', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='ap_hi', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's ap_hi ")
axes[1].set_title(" Men's ap_hi ")
plt.tight_layout()
plt.show()

## 3.7 AP_LO
     

In [None]:
df3 = df3[(df3['ap_lo'] > 0) & (df3['ap_lo'] < 300)]

### 3.7.2 AP_LO BY GENDER

In [None]:
#gender
df_female = df3[df3['gender'] == 1]
df_male = df3[df3['gender'] == 2]

# boxplot
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,6))
sns.boxplot(data=df_female, y='ap_lo', ax=axes[0], color='pink')
sns.boxplot(data=df_male, y='ap_lo', ax=axes[1], color='lightblue')

# plot
axes[0].set_title(" Women's ap_lo ")
axes[1].set_title(" Men's ap_lo ")
plt.tight_layout()
plt.show()

## 3.8 CHOLESTEROL

In [None]:
df3['cholesterol']=df3['cholesterol'].map({ 1: 'normal', 2: 'above normal', 3: 'well above normal'})

### 3.8.1 BAR PLOT

In [None]:
sns.countplot(x='cholesterol', data=df3, palette=['green', 'yellow', 'red'])
plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Cholesterol distribution')

fig = plt.gcf()
fig.set_size_inches(12, 6)

plt.show()

### 3.8.2 CHOLESTEROL BY GENDER

In [None]:
#contigency_table
contingency_table = pd.crosstab(df3['gender'], df3['cholesterol'])
contingency_table.plot(kind='bar', stacked=True, figsize=(12,6))
#plot
plt.title('Cholesterol by gender')
plt.xlabel('Gender')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.legend(title='Cholesterol')

plt.show()

## 3.9 GLUCOSE


In [None]:
df3['gluc']=df3['gluc'].map({ 1: 'normal', 2: 'above normal', 3: 'well above normal'})

### 3.9.1 BAR PLOT

In [None]:
sns.countplot(x='gluc', data=df3, palette=['green', 'yellow', 'red'])
plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Glucose distribution')

fig = plt.gcf()
fig.set_size_inches(12, 6)

plt.show()

### 3.9.2 GLUCOSE BY GENDER

In [None]:
#contigency_table
contingency_table = pd.crosstab(df2['gender'], df3['gluc'])
contingency_table.plot(kind='bar', stacked=True, figsize=(12,6))
#plot
plt.title('Glucose by gender')
plt.xlabel('Gender')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.legend(title='Glucose')

plt.show()

## 3.10 SMOKE

In [None]:
df3['smoke']=df3['smoke'].map({ 0: 'not a smoker', 1: 'smoker'})

### 3.10.1 BAR PLOT

In [None]:
glucose = df3['smoke'].value_counts()
categories = ['no smoke', 'smoke']
colors = ['green','red']

fig = plt.figure(figsize=(12, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(categories, glucose , color= colors)

plt.xlabel('Categories]')
plt.ylabel('Count')
plt.title('Smoke distribution')

plt.show()

### 3.10.2 SMOKE BY GENDER

In [None]:
#contigency_table
contingency_table = pd.crosstab(df3['gender'], df3['smoke'])
contingency_table.plot(kind='bar', stacked=True, figsize=(12,6))
#plot
plt.title('Smoke by gender')
plt.xlabel('Gender')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.legend(title='Smoke')

plt.show()

## 3.11 ALCO

In [None]:
df3['alco']=df3['alco'].map({ 0: 'no drink', 1: 'drink'})

### 3.11.1 BAR PLOT

In [None]:
glucose = df3['alco'].value_counts()
categories = ['Not a Alchoalic', 'Alchoalic']
colors = ['green','red']

fig = plt.figure(figsize=(12, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(categories, glucose , color= colors)

plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Alc distribution')

plt.show()

### 3.11.2 ALCOHOLIC BY GENDER

In [None]:
#contigency_table
contingency_table = pd.crosstab(df3['gender'], df3['alco'])
contingency_table.plot(kind='bar', stacked=True, figsize=(12,6))
#plot
plt.title('Alcoholic by gender')
plt.xlabel('Gender')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.legend(title='Alcoholic')

plt.show()

## 3.12 ACTIVE

In [None]:
df3['active']=df3['active'].map({ 0: 'no active', 1: 'active'})

### 3.12.1 BAR PLOT

In [None]:
active = df3['active'].value_counts()
categories = ['Not involved in Physical Activites', 'involved in Physical Activites']
colors = ['red','green']

fig = plt.figure(figsize=(12, 6))
ax = fig.add_axes([0,0,1,1])
ax.bar(categories, active , color= colors)

plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Active distribution')

plt.show()

### 3.12.2 ACTIVE BY GENDER

In [None]:
#contigency_table
contingency_table = pd.crosstab(df3['gender'], df3['active'])
contingency_table.plot(kind='bar', stacked=True, figsize=(12,6))
#plot
plt.title('Active by gender')
plt.xlabel('Gender')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.legend(title='Active')

plt.show()

# 4.0 OUTLIERS

In [None]:
df4 = df3.copy()

In [None]:
def outliers(df_out, drop = False):
    for each_feature in df_out.columns:
        feature_data = df_out[each_feature]
        Q1 = np.percentile(feature_data, 25.) # 25th percentile of the data of the given feature
        Q3 = np.percentile(feature_data, 75.) # 75th percentile of the data of the given feature
        IQR = Q3-Q1 #Interquartile Range
        outlier_step = IQR * 1.5 #That's we were talking about above
        outliers = feature_data[~((feature_data >= Q1 - outlier_step) & (feature_data <= Q3 + outlier_step))].index.tolist()  
        print('For the feature {}, No of Outliers is {} and No of Data is {}'.format(each_feature, len(outliers),df4[each_feature].shape[0]))
outliers(df4[['weight','age','ap_hi','ap_lo']])

# 5.0 PREPOCESSING

In [None]:
df5 = df4.drop('id', axis=1).copy()

In [None]:
df5.head()

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
# One-hot encoding
df5 = pd.get_dummies(df5, columns=['cholesterol', 'gluc'])

# Binary
df5['smoke'] = df5['smoke'].replace({'not a smoker': 0, 'smoker': 1})
df5['alco'] = df5['alco'].replace({'no drink': 0, 'drink': 1})
df5['active'] = df5['active'].replace({'no active': 0, 'active': 1})
df5['gender'] = df5['gender'].replace({ 1: 0, 2: 1})

# MinMaxScaler
minmax_scaler = MinMaxScaler()
df5[['age', 'bmi', 'model_score']] = minmax_scaler.fit_transform(df5[['age', 'bmi', 'model_score']])

# StandardScaler
standard_scaler = StandardScaler()
df5[['height', 'weight', 'ap_hi', 'ap_lo', 'pulse_pression']] = standard_scaler.fit_transform(df5[['height', 'weight', 'ap_hi', 'ap_lo', 'pulse_pression']])

# Save models
with open('one_hot_encoding.pkl', 'wb') as f:
    pickle.dump(df5, f)

with open('binary_encoding.pkl', 'wb') as f:
    pickle.dump(df5[['smoke', 'alco', 'active', 'gender']], f)

with open('minmax_scaler.pkl', 'wb') as f:
    pickle.dump(minmax_scaler, f)

with open('standard_scaler.pkl', 'wb') as f:
    pickle.dump(standard_scaler, f)

In [None]:
df5.head()

## 5.3 Correlation of Variables

In [None]:
corr = df5.corr()

# plot
plt.figure(figsize=(12, 6))
sns.heatmap(corr, annot=True)
plt.title('Matriz de Correlação')
plt.show()

## 5.4 CORRELATION WITH TARGET

In [None]:
corr = df5.corr(method='spearman')


target_corr = corr.loc['cardio']
target_corr = target_corr.sort_values(ascending=False)
target_corr = target_corr.drop('cardio')
target_corr = pd.DataFrame(target_corr)
target_corr['index'] = target_corr.index
target_corr = target_corr.reset_index(drop=True)

# plot
plt.figure(figsize=(14, 6))
ax = sns.barplot(x=target_corr['index'], y=target_corr['cardio'])
plt.title('Correlações de Spearman da variável alvo')
plt.ylabel('Correlação')
plt.xticks(rotation=90)

# values
for i, v in enumerate(target_corr['cardio']):
    ax.text(i, v, round(v, 2), ha='center', va='bottom')

plt.show()

# 6.0 MACHINE LEARNING

In [None]:
final_data = df5.copy()

In [None]:
final_data.columns

### 6.1 SPLITTING AND STANDARDIZING DATA

In [None]:
cols = ['ap_hi','age','model_score','bmi','cholesterol_normal']
X = df5[cols]
y = df5['cardio']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train

In [None]:
X_test

In [None]:
y_train

## 6.2 K-NEAREST NEIGHBORS

In [None]:
#start_time = time.time()

#error_rate = []
#for i in range(1, 31):
     #knn = KNeighborsClassifier(n_neighbors=i)
     #scores = cross_val_score(knn, X_train, y_train, cv=10)
     #error_rate.append(1 - np.mean(scores))

#plt.figure(figsize=(10,6))
#plt.plot(range(1, 31), error_rate, color='blue', linestyle='dashed', marker='o',
          #markerfacecolor='red', markersize=10)
#plt.title('Error Rate vs. K Value')
#plt.xlabel('K')
#plt.ylabel('Error Rate');

#best_k = np.argmin(error_rate) + 1
#print(f"Best K: {best_k}")

#end_time = time.time()  # Registra o tempo de término
#total_time = end_time - start_time  

#print("Time: {:.2f} segundos".format(total_time))

In [None]:
#best_k =29

In [None]:
#best_knn = KNeighborsClassifier(n_neighbors=best_k)
#best_knn.fit(X_train, y_train)

#y_pred = best_knn.predict(X_test)

#print(classification_report(y_test, y_pred))

In [None]:
#cm = confusion_matrix(y_test, y_pred)

## Plot
#fig, ax = plt.subplots(figsize=(10, 8))
#plot_confusion_matrix(best_knn, X_test, y_test, cmap=plt.cm.Blues, values_format='.0f', ax=ax)
#plt.show()

In [None]:
#with open('knn_model.pkl', 'wb') as f:
    #pickle.dump(best_knn, f)

## 6.3 RANDOM FOREST

In [None]:
# start_time = time.time()

# param_grid = {'max_depth': [80, 90],
#                'max_features': [2, 3, 4],
#                'min_samples_leaf': [3, 4],
#                'min_samples_split': [8, 10],
#                'n_estimators': [100, 150, 200]}


# rf = RandomForestClassifier(criterion='gini', bootstrap=True, oob_score=False)

# # GRID SEARCH
# grid = GridSearchCV(estimator=rf, param_grid=param_grid, cv=10, n_jobs=-1, verbose=1)
# grid.fit(X_train, y_train)


# best_model = grid.best_estimator_
# print("Melhores parâmetros: ", grid.best_params_)


# end_time = time.time()
# total_time = end_time - start_time


# print("Time: {:.2f} segundos".format(total_time))

In [None]:
#params = {
    # 'max_depth': 90,
    # 'max_features': 2,
   #  'min_samples_leaf': 4,
   #  'min_samples_split': 10,
   #  'n_estimators': 100
# }

#best_model = RandomForestClassifier(**params)

##predict

#best_model.fit(X_train, y_train)
#y_pred = best_model.predict(X_test)

## Classification Report
#report = classification_report(y_test, y_pred)
#print(report)

In [None]:
#cm = confusion_matrix(y_test, y_pred)

## plot
#plt.figure(figsize=(12, 6))
#sns.heatmap(cm, annot=True, cmap="Blues", fmt="d")
#plt.title("Confusion Matrix")
#plt.xlabel("Predicted label")
#plt.ylabel("True label")
#plt.show()

In [None]:
#filename = 'rf.pkl'

#pickle.dump(best_model, open(filename, 'wb'))

## 6.4 XGBOOST

In [None]:
# start_time = time.time() 

# # Hyperparams
# param_grid = {
#      'learning_rate': [0.1, 0.5],
#      'n_estimators': [100, 200],
#      'max_depth': [3, 5],
#      'min_child_weight': [1, 3],
#      'gamma': [00.1, 0.2]
#  }

# xgb_model = xgb.XGBClassifier()

# # GRIDSEARCH
# grid_search = GridSearchCV(xgb_model, param_grid, cv=10)
# grid_search.fit(X_train, y_train)

# # best hyperparameters
# print("Best hyperparameters: ", grid_search.best_params_)

# end_time = time.time()  
# total_time = end_time - start_time 

# print("Time: {:.2f} segundos".format(total_time))

In [None]:
pip install xgboost

In [None]:
import xgboost as xgb

In [None]:
params = {
     'gamma': 0.2,
     'learning_rate': 0.1,
     'max_depth': 5,
     'min_child_weight': 1,
     'n_estimators': 200
 }

best_model = xgb.XGBClassifier(**params)

# predict
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)

# Classification Report

report = classification_report(y_test, y_pred)
print(report)

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()

# 7.0 DEPLOY

In [None]:
import pickle

# Salvar o modelo em um arquivo
filename = 'xgboost_model.pkl'
pickle.dump(best_model, open(filename, 'wb'))

In [None]:
#from flask import Flask, render_template, request
#import numpy as np
#import xgboost as xgb

#app = Flask(__name__)

#@app.route('/')
#def index():
  #  return render_template('form.html')

#@app.route('/submit', methods=['POST'])
#def submit():
    #ap_hi = request.form['ap_hi']
    #age = request.form['age']
    #model_score = request.form['model_score']
    #bmi = request.form['bmi']
    #cholesterol_normal = request.form['cholesterol_normal']
    ## Faça algo com os dados, como salvá-los em um banco de dados ou calculá-los
    #return 'Dados recebidos com sucesso!'

#if __name__ == '__main__':
    #app.run(debug=False)

In [None]:
from flask import Flask, render_template, request
import pickle
import numpy as np

app = Flask(__name__)

# Carrega o modelo treinado
with open('xgboost_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Carrega os objetos de scaler
with open('minmax_scaler.pkl', 'rb') as file:
    minmax_scaler = pickle.load(file)

with open('standard_scaler.pkl', 'rb') as file:
    standard_scaler = pickle.load(file)

# Define uma função para fazer previsões com base nos dados do formulário
def fazer_previsao(ap_hi, age, model_score, bmi, cholesterol_normal):
    # Cria um dicionário com os dados do formulário
    dados = {'ap_hi': [ap_hi],
             'age': [age],
             'model_score': [model_score],
             'bmi': [bmi],
             'cholesterol_normal': [cholesterol_normal]}

    # Escala os dados do formulário
    dados_scaled = minmax_scaler.transform(dados[['age', 'bmi', 'model_score']]).tolist()[0] + standard_scaler.transform(dados[['ap_hi']]).tolist()[0] + dados['cholesterol_normal']

    # Transforma os dados escalados em um array NumPy para que possam ser usados pelo modelo
    dados_array = np.array([dados_scaled])

    # Faz a previsão usando o modelo carregado
    previsao = model.predict(dados_array)

    # Retorna a previsão como uma string
    return 'Doença cardiovascular' if previsao == 1 else 'Sem doença cardiovascular'

@app.route('/')
def home():
    return render_template('form.html')

@app.route('/submit', methods=['POST'])
def submit():
    ap_hi = float(request.form['ap_hi'])
    age = float(request.form['age'])
    model_score = float(request.form['model_score'])
    bmi = float(request.form['bmi'])
    cholesterol_normal = float(request.form['cholesterol_normal'])
    previsao = fazer_previsao(ap_hi, age, model_score, bmi, cholesterol_normal)
    return redirect(url_for('resultado', previsao=previsao))


if __name__ == '__main__':
    app.run(debug=False)


