# Programming Assignment 2 - PRML
### UBIT: haudipud  
### UB ID: 50599797


## Section 1: Binary Classification (30 marks)
- Dataset: `abalone.csv`
- Task: Predict if abalone is older than 10 years (rings > 8.5)
- Model: Logistic Regression (from `prml.linear`)


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression


df = pd.read_csv('abalone.csv')
df['age'] = df['rings'] + 1.5
df['binary_label'] = (df['age'] > 10).astype(int)


X = df.drop(columns=['rings', 'age', 'binary_label'])
y = df['binary_label']


if X['class'].dtype == object:
    X = pd.get_dummies(X, columns=['class'], drop_first=True)


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [4]:

from prml.linear import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.classify(X_test)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {acc:.4f}\n")
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", report)

Accuracy: 0.8002

Confusion Matrix:
 [[234  45]
 [122 435]]

Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.84      0.74       279
           1       0.91      0.78      0.84       557

    accuracy                           0.80       836
   macro avg       0.78      0.81      0.79       836
weighted avg       0.82      0.80      0.80       836



In [5]:
%pip install torch torchvision torchaudio


Note: you may need to restart the kernel to use updated packages.


## Section 2: Multi-class Classification (30 marks)
- Dataset: `abalone.csv`
- Task: Predict age group: young (<=8), adult (9–11), old (>=12)
- Model: Softmax Regression (from `prml.linear`)


In [7]:

df['age_group'] = pd.cut(df['rings'], bins=[0, 8, 11, df['rings'].max()], labels=[0,1,2]).astype(int)
X_multi = df.drop(columns=['rings', 'age', 'binary_label', 'age_group'])
y_multi = df['age_group']


if X_multi['class'].dtype == object:
    X_multi = pd.get_dummies(X_multi, columns=['class'], drop_first=True)


X_multi_scaled = scaler.fit_transform(X_multi)


X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X_multi_scaled, y_multi, test_size=0.2, random_state=42)


In [8]:
from prml.linear import SoftmaxRegression

model_multi = SoftmaxRegression()
model_multi.fit(X_train_m, y_train_m)
y_pred_m = model_multi.classify(X_test_m)

acc_m = accuracy_score(y_test_m, y_pred_m)
cm_m = confusion_matrix(y_test_m, y_pred_m)
report_m = classification_report(y_test_m, y_pred_m)

print(f"Accuracy: {acc_m:.4f}\n")
print("Confusion Matrix:\n", cm_m)
print("\nClassification Report:\n", report_m)

Accuracy: 0.5096

Confusion Matrix:
 [[230  22  27]
 [ 91  48 235]
 [ 20  15 148]]

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.82      0.74       279
           1       0.56      0.13      0.21       374
           2       0.36      0.81      0.50       183

    accuracy                           0.51       836
   macro avg       0.53      0.59      0.48       836
weighted avg       0.56      0.51      0.45       836



## Section 3: Neural Networks (20 marks)
- Dataset: `bonus.csv`
- Task: Predict wine `quality` using a neural network
- Model: Feedforward Neural Network (PyTorch)


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd


bonus_df = pd.read_csv('bonus.csv')
X = bonus_df.drop(columns=['quality'])
y = bonus_df['quality']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
y_test = torch.tensor(y_test.values, dtype=torch.long)


class FeedforwardNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.net(x)


input_dim = X_train.shape[1]
hidden_dim = 32
output_dim = y.max() + 1


model = FeedforwardNN(input_dim, hidden_dim, output_dim)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 30

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 5 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")


model.eval()
with torch.no_grad():
    y_pred = model(X_test).argmax(dim=1)
    acc = accuracy_score(y_test, y_pred)
    print(f"\nAccuracy: {acc:.4f}")
    print("\nClassification Report:\n")
    print(classification_report(y_test, y_pred))

Epoch 5/30, Loss: 1.9269
Epoch 10/30, Loss: 1.5156
Epoch 15/30, Loss: 1.3046
Epoch 20/30, Loss: 1.2432
Epoch 25/30, Loss: 1.1866
Epoch 30/30, Loss: 1.1375

Accuracy: 0.5449

Classification Report:

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       1.00      0.04      0.08        25
           5       0.58      0.65      0.61       291
           6       0.53      0.67      0.59       432
           7       0.52      0.30      0.38       192
           8       0.00      0.00      0.00        35

    accuracy                           0.54       980
   macro avg       0.44      0.27      0.28       980
weighted avg       0.53      0.54      0.52       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Section 4: Convolutional Neural Networks (20 marks)
- Dataset: Image folders (`train/` and `test/`)
- Classes: `adidas`, `converse`, `nike`
- Task: Classify images using a CNN
- Model: PyTorch CNN with Conv2D + ReLU + MaxPool + FC layers


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report


transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(root='cnn_data/train', transform=transform)
test_dataset = datasets.ImageFolder(root='cnn_data/test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)



class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 100),
            nn.ReLU(),
            nn.Linear(100, 3)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x


model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(5):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}")


model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))

Epoch 1, Loss: 25.7398
Epoch 2, Loss: 24.9600
Epoch 3, Loss: 23.9473
Epoch 4, Loss: 23.0536
Epoch 5, Loss: 21.8075

Classification Report:
              precision    recall  f1-score   support

      adidas       0.46      0.76      0.57        38
    converse       0.71      0.13      0.22        38
        nike       0.52      0.61      0.56        38

    accuracy                           0.50       114
   macro avg       0.57      0.50      0.45       114
weighted avg       0.57      0.50      0.45       114



## Section 5: Bonus - Multi-class Classification using Sklearn (10 marks)
- Dataset: `bonus.csv`
- Task: Predict wine `quality` using any `sklearn` classifier
- Model: RandomForestClassifier


In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


bonus_df = pd.read_csv('bonus.csv')
X = bonus_df.drop(columns=['quality'])
y = bonus_df['quality']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"Accuracy: {acc:.4f}\n")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.6939

Confusion Matrix:
 [[  0   0   3   2   0   0]
 [  0   5  12   8   0   0]
 [  0   4 203  82   2   0]
 [  0   0  63 344  25   0]
 [  0   0   3  73 112   4]
 [  0   0   1  12   6  16]]

Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.56      0.20      0.29        25
           5       0.71      0.70      0.70       291
           6       0.66      0.80      0.72       432
           7       0.77      0.58      0.66       192
           8       0.80      0.46      0.58        35

    accuracy                           0.69       980
   macro avg       0.58      0.46      0.49       980
weighted avg       0.70      0.69      0.69       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
