##### IMPORTANDO BIBLIOTECAS/MÓDULOS

In [87]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pycaret.datasets import get_data
import joblib
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

import warnings

##### ANALISANDO DATASET

In [None]:
warnings.filterwarnings('ignore')
df = get_data('blood')

plt.figure(figsize=[20,4])

for i, coluna in enumerate(df.columns):
    plt.subplot(1,len(df.columns),i+1)
    plt.boxplot(df[coluna])
    plt.title(coluna)

plt.show()

##### REMOVENDO OUTLIERS

In [None]:
df = df.loc[df['Recency']<30]
df = df.loc[df['Frequency']<30]

plt.figure(figsize=[20,4])

for i, coluna in enumerate(df.columns):
    plt.subplot(1,len(df.columns),i+1)
    plt.boxplot(df[coluna])
    plt.title(coluna)

plt.show()

##### DEFININDO BASES DE TREINO E DE TESTE

In [None]:
numeric = MinMaxScaler()

X = numeric.fit_transform(df.drop('Class', axis=1))
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=42)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

##### AVALIANDO MODELOS

In [None]:
models = [RandomForestClassifier(max_depth=5, n_estimators=500),
          ExtraTreesClassifier(max_depth=5, n_estimators=500),
          MLPClassifier(solver='lbfgs'),
          MLPClassifier(solver='sgd'),
          KNeighborsClassifier(n_neighbors=5),
          GaussianNB(),
          SVC(kernel='sigmoid'),
          AdaBoostClassifier(),
          GradientBoostingClassifier(),
          HistGradientBoostingClassifier(),
          DecisionTreeClassifier(max_depth=5)]

lista_modelos = []

for model in models:
    model.fit(X_train, y_train)
    teste = model.predict(X_test)

    acuracia = np.round(accuracy_score(y_test, teste)*100,2)

    lista_modelos.append((model, acuracia))

    print(model)
    print(classification_report(y_test, teste))

    sns.heatmap(confusion_matrix(teste, y_test), annot=True, fmt='.0f')
    plt.xlabel('Predição')
    plt.ylabel('Real')
    plt.title(str(model) + ' - Teste')

    plt.show()

result = pd.DataFrame(lista_modelos, columns=['Modelo', 'Acuracia'])
result = result.sort_values('Acuracia', ascending=False)
display(result)

##### SALVANDO MODELOS

In [None]:
joblib.dump(model,r'model.pkl')
joblib.dump(numeric,r'numeric.joblib')

##### CARREGANDO MODELOS

In [None]:
model = joblib.load(r'model.pkl')
numeric = joblib.load(r'numeric.joblib')