In [1]:
import numpy as np
import polars as pl
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler #escala das features

from sklearn.compose import make_column_transformer #pipeline com colunas de tipos diferentes
from sklearn.preprocessing import OneHotEncoder, TargetEncoder
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_regression #selecao de features
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import precision_score, recall_score, f1_score


from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier,RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc
from sklearn.model_selection import cross_val_score

In [3]:
import lightgbm as lgb
from lightgbm import LGBMRegressor
# Modelo
lgb_model = LGBMRegressor()

In [4]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
mushroom = fetch_ucirepo(id=73) 
  
# data (as pandas dataframes) 
X = mushroom.data.features 
y = mushroom.data.targets 


In [5]:
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,)

In [6]:
y = mushroom.data.targets['poisonous'].map({'e': 0, 'p': 1})
y

0       1
1       0
2       0
3       1
4       0
       ..
8119    0
8120    0
8121    0
8122    1
8123    0
Name: poisonous, Length: 8124, dtype: int64

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Definação das colunas categóricas
categorical_columns = ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'] 


classifiers_list = [
    KNeighborsClassifier(3),
    SVC(kernel="rbf", C=0.025, probability=True),
    SVC(),
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    ]


for classifier in classifiers_list:
    # Transformador categórico
    categorical_transformer = Pipeline(steps=[
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
    ])

    # Pré-processador
    preprocessor = ColumnTransformer(
    transformers=[
        ("cat", categorical_transformer, categorical_columns)
    ])


    pipe = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", classifier)])

    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    
    print(classifier)
    

KNeighborsClassifier(n_neighbors=3)
SVC(C=0.025, probability=True)
SVC()
LogisticRegression()
DecisionTreeClassifier()
RandomForestClassifier()




AdaBoostClassifier()
GradientBoostingClassifier()


In [12]:
results = []

# Loop pelos classificadores
for classifier in classifiers_list:
    
    pipe = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", classifier)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    
    # Alterar para o tipo de média desejado para problemas multiclasse
    precision = precision_score(y_test, y_pred, average='binary')
    recall = recall_score(y_test, y_pred, average='binary')
    f1 = f1_score(y_test, y_pred, average='binary')
    
    results.append({
        "Model": classifier.__class__.__name__,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })

result = pd.DataFrame(results)

print(result)




                        Model  Accuracy  Precision   Recall  F1-Score
0        KNeighborsClassifier  1.000000   1.000000  1.00000  1.000000
1                         SVC  0.987692   0.994805  0.97954  0.987113
2                         SVC  1.000000   1.000000  1.00000  1.000000
3          LogisticRegression  1.000000   1.000000  1.00000  1.000000
4      DecisionTreeClassifier  1.000000   1.000000  1.00000  1.000000
5      RandomForestClassifier  1.000000   1.000000  1.00000  1.000000
6          AdaBoostClassifier  1.000000   1.000000  1.00000  1.000000
7  GradientBoostingClassifier  1.000000   1.000000  1.00000  1.000000
