#### Criação e Avaliação de Modelos de Classificação

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, RobustScaler, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier


In [2]:
df = pd.read_csv("../Data/dataset_codificado.csv")

#### Pré-Processamento

In [3]:
X = df.drop(columns=['Qualidade_Ar'], axis=1)
y = df['Qualidade_Ar']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [5]:
scalers = {
    'MaxAbsScaler': MaxAbsScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler(),
    'StandardScaler': StandardScaler()
}

In [6]:
scaled_data = {}

for name, scaler in scalers.items():
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    scaled_data[name] = {
        'X_train': X_train_scaled,
        'X_test': X_test_scaled
    }

In [11]:
def treinar_modelo(modelo, resultados):
    for nome, scaled in scaled_data.items():
        X_train_scaled = scaled['X_train']
        X_test_scaled = scaled['X_test']
        
        modelo.fit(X_train_scaled, y_train)
        
        y_pred = modelo.predict(X_test_scaled)
        
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        resultados[nome] = {
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1,
        }

In [12]:
def apresentar_resultados(modelo):
    dicionario_resultados = {}
    treinar_modelo(modelo, dicionario_resultados)
    df_modelo = pd.DataFrame(dicionario_resultados).T
    return df_modelo

#### LogisticRegression

In [13]:
logistic_regression = LogisticRegression(random_state=101)

In [14]:
apresentar_resultados(logistic_regression)

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
MaxAbsScaler,0.917,0.917193,0.917,0.914847
MinMaxScaler,0.919,0.918883,0.919,0.917094
RobustScaler,0.936,0.935396,0.936,0.93513
StandardScaler,0.935,0.934379,0.935,0.934223


#### DecisionTree

In [15]:
decision_tree = DecisionTreeClassifier(random_state=101)

In [16]:
apresentar_resultados(decision_tree)

Unnamed: 0,Accuracy,Precision,Recall,F1-Score
MaxAbsScaler,0.917,0.916615,0.917,0.916704
MinMaxScaler,0.917,0.916615,0.917,0.916704
RobustScaler,0.917,0.916615,0.917,0.916704
StandardScaler,0.915,0.91476,0.915,0.914823
