### Exercise 7.2

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [19]:
data = pd.read_csv('../data/wine_quality.csv')
# x = data[:, 0:-1].to_numpy()
# y = data[:,-1].to_numpy()
X = data.iloc[:, :-1].to_numpy()
y = data.iloc[:, -1].to_numpy()
labels = {
    'Medium': 0,
    'Good': 1,
    'Excellent': 2
}
y = np.array([labels[i] for i in y])

print(X.shape)
print(y.shape)

(4898, 11)
(4898,)


### Standardize the feature

In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### Split into the train test set

In [9]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [10]:
# Convert numpy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

In [11]:
# Step 2: Dimensionality Reduction using PCA
pca = PCA(n_components=5)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [12]:
# Step 3: Autoencoder Implementation using PyTorch
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 5)  # bottleneck layer with 5 neurons
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(5, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [13]:
# Initialize autoencoder
input_dim = X_train.shape[1]
autoencoder = Autoencoder(input_dim)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, X_train_tensor)  # input = target for autoencoder
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [14]:
# Train the autoencoder
num_epochs = 50
for epoch in range(num_epochs):
    for data in train_loader:
        inputs, targets = data
        optimizer.zero_grad()
        outputs = autoencoder(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()


In [15]:
# Extract 5-dimensional bottleneck features using the encoder
with torch.no_grad():
    X_train_autoencoder = autoencoder.encoder(X_train_tensor).numpy()
    X_test_autoencoder = autoencoder.encoder(X_test_tensor).numpy()

In [17]:
# Step 4: Neural Network Classifier using scikit-learn
def train_classifier(X_train, X_test, y_train, y_test):
    classifier = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=500, random_state=42)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

# Classifier on original data
print("Original Data:")
accuracy_orig, report_orig = train_classifier(X_train, X_test, y_train, y_test)
print(f"Accuracy: {accuracy_orig}")
print(report_orig)

# Classifier on PCA-reduced data
print("PCA-Reduced Data:")
accuracy_pca, report_pca = train_classifier(X_train_pca, X_test_pca, y_train, y_test)
print(f"Accuracy: {accuracy_pca}")
print(report_pca)

# Classifier on Autoencoder-reduced data
print("Autoencoder-Reduced Data:")
accuracy_ae, report_ae = train_classifier(X_train_autoencoder, X_test_autoencoder, y_train, y_test)
print(f"Accuracy: {accuracy_ae}")
print(report_ae)

Original Data:




Accuracy: 0.6520408163265307
              precision    recall  f1-score   support

           0       0.67      0.66      0.66       321
           1       0.63      0.66      0.64       432
           2       0.67      0.63      0.65       227

    accuracy                           0.65       980
   macro avg       0.66      0.65      0.65       980
weighted avg       0.65      0.65      0.65       980

PCA-Reduced Data:
Accuracy: 0.5673469387755102
              precision    recall  f1-score   support

           0       0.58      0.63      0.61       321
           1       0.54      0.59      0.57       432
           2       0.61      0.42      0.50       227

    accuracy                           0.57       980
   macro avg       0.58      0.55      0.56       980
weighted avg       0.57      0.57      0.56       980

Autoencoder-Reduced Data:
Accuracy: 0.6
              precision    recall  f1-score   support

           0       0.67      0.54      0.60       321
           1 