In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Set random seed for reproducibility
torch.manual_seed(42)

# Load and prepare the Wine Quality dataset
data = pd.read_csv('https://github.com/Shuhrat-git/ML_assignment_1/blob/main/winequality-red.csv', sep=';')

### Data Preprocessing
Splitting the dataset into training and test sets and standardizing the features.

In [None]:
# Extract features (X) and target (y)
X = data.drop('quality', axis=1).values
y = data['quality'].values.reshape(-1, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Convert to PyTorch tensors

In [None]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

### Define the neural network model for regression

In [None]:
class WineQualityModel(nn.Module):
    def __init__(self, input_size, hidden_size=16, output_size=1):
        super(WineQualityModel, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Forward pass implementation
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

# Get input size from data
input_size = X_train.shape[1]  # Number of features
output_size = 1  # Single output for regression

### Model Training

In [None]:
# Initialize the model
model = WineQualityModel(input_size=input_size, output_size=output_size)

# Define loss function
loss_function = nn.MSELoss()  # Mean Squared Error loss

# Define optimizer (using Adam for gradient descent)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training parameters
num_epochs = 100
batch_size = 32
train_losses = []

# Training loop
for epoch in range(num_epochs):
    # Set model to training mode
    model.train()
    
    # Forward pass
    outputs = model(X_train_tensor)
    
    # Calculate loss
    loss = loss_function(outputs, y_train_tensor)
    
    # Backward pass
    optimizer.zero_grad()  # Clear previous gradients
    loss.backward()        # Compute gradients
    
    # Update weights using gradient descent
    optimizer.step()
    
    # Record loss
    train_losses.append(loss.item())
    
    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

### Prediction step

In [None]:
model.eval()  # Set model to evaluation mode
with torch.no_grad():  # No need to track gradients for predictions
    y_pred_tensor = model(X_test_tensor)
    y_pred = y_pred_tensor.numpy()
    y_test_np = y_test_tensor.numpy()
    
    # Calculate testing MSE
    test_mse = mean_squared_error(y_test_np, y_pred)
    print(f"Test MSE: {test_mse:.4f}")

### Plot actual vs predicted values

In [None]:
# 5️⃣ Histogram Overlay for Comparison
plt.figure(figsize=(10, 6))
plt.hist(y_test_np, bins=10, alpha=0.5, label="Actual Quality", color="navy", edgecolor="black")
plt.hist(y_pred, bins=10, alpha=0.5, label="Predicted Quality", color="aqua", edgecolor="black")
plt.xlabel("Wine Quality")
plt.ylabel("Frequency")
plt.title("Histogram of Actual vs. Predicted Wine Quality")
plt.legend()
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

### Plot loss vs epochs

In [None]:
# Plot loss vs epochs
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses)
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training Loss over Epochs')
plt.grid(True)
plt.savefig('loss_vs_epochs.png')
plt.show()