In [None]:
import pandas as pd


pd.set_option("display.max_columns", None)

In [None]:
treated_air_quality_dataset = "data/air_quality_dataset_treated.parquet"

df = pd.read_parquet(treated_air_quality_dataset)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
X = df.drop(['status'], axis = 1)
y = df['status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
pipeline = Pipeline(
    [
        ('scaler', StandardScaler()),
        ('mlp', MLPClassifier(max_iter=500))
    ]
)

param_grid = {
    'mlp__hidden_layer_sizes': [(10,), (50,), (10,10), (50,50)],
    'mlp__activation': ['tanh', 'relu'],
    'mlp__solver': ['adam', 'sgd'],
    'mlp__alpha': [0.0001, 0.001, 0.01],
    'mlp__learning_rate': ['constant', 'adaptative']
}

In [None]:
grid_search = GridSearchCV(pipeline, param_grid, n_jobs=1, cv=3, scoring='accuracy', verbose=2)

grid_search.fit(X_train, y_train)

print(f"Melhores parâmetros:\n {grid_search.best_params_}")

In [None]:
y_pred = grid_search.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True)
plt.show()

In [None]:
print(classification_report(y_test, y_pred))