In [None]:
# Instalujemy biblioteke aby połączyć sklearn i pytorch
!pip install skorch

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance
from sklearn.metrics import confusion_matrix, roc_curve, auc, classification_report, ConfusionMatrixDisplay
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# Ładowaneie danych
df = pd.read_csv('./winequality-red.csv')

In [None]:
# Odzielamy target i funkcje
target = pd.DataFrame(df['quality'])
df.drop('quality', axis=1, inplace=True)

In [None]:
# Normalizacja cech
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(target)

In [None]:
# Sprawdzamy pierwsze wiersze
print(df.head())
print(target.head())

In [None]:
# Przygotowanie do hisotgramu
df_hist = df.copy()
df_hist['quality'] = target

# Tworzenie subplots - ustalanie liczby wierszy i kolumn na podstawie liczby kolumn w DataFrame
num_features = df_hist.shape[1]
nrows = (num_features + 2) // 3  # Zakładamy 3 kolumny na wiersz

fig, ax = plt.subplots(nrows=nrows, ncols=3, figsize=(20, 15))
ax = ax.flatten()  # Spłaszczanie tablicy osi dla łatwiejszej iteracji

# Iteracja po wszystkich kolumnach i tworzenie histogramów
for i, col in enumerate(df_hist.columns):
    ax[i].hist(df_hist[col], bins=20, color='orange', alpha=0.7)
    ax[i].set(title=col)

# Usuwanie pustych subplotów
for j in range(i + 1, len(ax)):
    fig.delaxes(ax[j])
plt.tight_layout()
plt.show()

In [None]:
# Tworzymy macierz wykresu za pomocą Seaborn
sns.pairplot(df_hist, diag_kind='kde', plot_kws={'color': 'orange'}, diag_kws={'color': 'orange'})
plt.show()

In [None]:
# Dzieli dane na model testowy i treningowy
X_train, X_test, y_train, y_test = train_test_split(df.values, target.values, test_size=0.2, random_state=42)

In [None]:
# konwertujemy wartosc na kalse binarną
median_rent = target.median()[0]
y_train_class = (y_train >= median_rent).astype(int).ravel()
y_test_class = (y_test >= median_rent).astype(int).ravel()

In [None]:
# Definiujemy architekturę sieci neuronowej za pomocą elastycznych warstw
class WineQualityNN(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim):
        super(WineQualityNN, self).__init__()
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
# Definiujemy wymiary wejściowe i wyjściowe
input_dim = X_train.shape[1]
output_dim = 2

In [None]:
# Tworzymy siec neuronową
net = NeuralNetClassifier(
    module=WineQualityNN,
    max_epochs=20,
    lr=0.1,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss,
    iterator_train__shuffle=True,
    verbose=0,
    module__input_dim=input_dim,
    module__output_dim=output_dim
)

In [None]:
# Podajemy parametry dla GridSearcha
params = {
    'lr': [0.01,0.2,0.4],
    'max_epochs': [10,20,30],
    'module__hidden_dims': [
        [32],[64],[32,32]
    ],
    'optimizer__weight_decay': [0, 1e-4, 1e-2]
}

In [None]:
# Wykonujemy GridSearch
gs = GridSearchCV(net, params, refit=True, cv=3, scoring='accuracy')
gs.fit(X_train.astype(np.float32), y_train_class)

In [None]:
# Sprawdzamy dla jakich parametróww mamy najlepsza walidacje krzyżową
print("Best parameters found: ", gs.best_params_)
print("Best cross-validation accuracy: ", gs.best_score_)