In [47]:
#Import das Libs
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import cross_validate, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from pickle import dump, load

In [48]:
#carrega dados
colunas = ['id', 'pSist', 'pDiast', 'qPA', 'pulso', 'fResp', 'grav', 'class']
data = pd.read_csv('/content/env_vital_signals dados.txt', sep=',', names=colunas)

In [49]:
#separa atributos e classes
data_attributes = data.drop(columns="class")
data_classes = data['class']

In [50]:
#normaliza e balancea
normalizer = MinMaxScaler()
data_attributes_normalized = normalizer.fit_transform(data_attributes)

balancer = SMOTE()
data_attributes_balanced, data_classes_balanced = balancer.fit_resample(data_attributes_normalized, data_classes)

In [51]:
#treinamento com Decision Tree
dt = DecisionTreeClassifier()
cv_results_dt = cross_validate(dt, data_attributes_balanced, data_classes_balanced, cv=10)

In [52]:
#GridSearchCV
param_grid_dt = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [53]:
#busca melhores hiperparâmetros
dt_grid = GridSearchCV(dt, param_grid_dt, refit=True, verbose=2)
dt_grid.fit(data_attributes_balanced, data_classes_balanced)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=2; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=2; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=2; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=2; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=2; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=5; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=5; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=5; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=5; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=5; total time=   0.0s
[CV] END max_depth=3, min_samples_leaf=1, min_samples_split=10; total time=   0.0s
[CV] END max_depth=3, min_samples_l

In [56]:
# Treinar o modelo com os melhores parâmetros encontrados
dt_best_params = dt_grid.best_estimator_
dt_best_params.fit(data_attributes_balanced, data_classes_balanced)

# Avaliar o desempenho do modelo
classes_predict_dt = dt_best_params.predict(data_attributes_normalized)
report_dt = classification_report(data_classes, classes_predict_dt)
print(report_dt)

# Salvar o modelo treinado
dump(dt_best_params, open("predict_decision_tree.pkl", "wb"))


              precision    recall  f1-score   support

           1       1.00      1.00      1.00       620
           2       1.00      1.00      1.00      2332
           3       1.00      1.00      1.00       989
           4       1.00      1.00      1.00        59

    accuracy                           1.00      4000
   macro avg       1.00      1.00      1.00      4000
weighted avg       1.00      1.00      1.00      4000



In [57]:
# Salvar o modelo treinado
dump(dt_grid, open("predict_decision_tree.pkl", "wb"))

In [59]:
#Normalizar o novo paciente
pacient = [[27,13.967106,11.146987,6.221129,185.367029,6.557369,14.446909]]
normalizer_file = load(open('data_norm.pkl', 'rb'))
new_patient_normalized = normalizer_file.transform(pacient)
print(new_patient_normalized)

[[0.00675169 0.52768777 0.74319897 0.85617152 0.92734103 0.29801708
  0.01659305]]


In [61]:
#Classificar o novo paciente
predict_file = load(open('predict_decision_tree.pkl', 'rb'))
#print(predict_file)
predict_pacient = predict_file.predict(new_patient_normalized)
print(predict_pacient)

[1]
