In [77]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [78]:
image_dir = '/Users/shreyasmishra/Documents/MLCode/CV/Project Task 1/train_data'
label_file = '/Users/shreyasmishra/Documents/MLCode/CV/Project Task 1/train_labels.txt'

In [79]:
class ImageDataset(Dataset):
    def __init__(self, image_dir, label_file, num_classes = 60, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.num_classes = num_classes
        self.image_filenames = [f for f in sorted(os.listdir(image_dir)) if f.endswith('.jpg')]  
        self.labels = self.load_labels(label_file)
    
    def load_labels(self, label_file):
        with open(label_file, 'r') as f:
            labels = [int(line.strip()) for line in f]  
        return labels

    def one_hot_encode(self, label):
        # One-hot encode the label
        return torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes)
    
    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])
        label = self.labels[idx]
        
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        one_hot_label = self.one_hot_encode(label - 1)
        
        return image, one_hot_label

In [80]:
dataset = ImageDataset(image_dir=image_dir, label_file=label_file, num_classes=60, transform=transforms.ToTensor())


In [81]:
all_images = []
all_labels = []
for img, label in dataset:
    all_images.append(img.numpy().flatten())  # Flatten 256x256 image to 1D array
    all_labels.append(torch.argmax(label).item()) 

all_images = np.array(all_images)
all_labels = np.array(all_labels)

X_train, X_val, y_train, y_val = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)


In [82]:
pca = PCA(n_components=100)  # Reduce to 100 principal components
X_train_pca = pca.fit_transform(X_train)
X_val_pca = pca.transform(X_val)

# Convert to PyTorch tensors
X_train_pca = torch.tensor(X_train_pca, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val_pca = torch.tensor(X_val_pca, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

In [83]:
train_dataset = torch.utils.data.TensorDataset(X_train_pca, y_train)
val_dataset = torch.utils.data.TensorDataset(X_val_pca, y_val)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [84]:
class ANN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)  # Fully connected layer 1
        self.fc2 = nn.Linear(128, 64)        # Fully connected layer 2
        self.fc3 = nn.Linear(64, output_dim) # Output layer
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))  # ReLU after first linear layer
        x = self.relu(self.fc2(x))  # ReLU after second linear layer
        x = self.fc3(x)             # No activation for the final layer
        return x

In [85]:
input_dim = 100  # Number of PCA components
output_dim = len(np.unique(all_labels))  # Number of classes (60 in this case)
model = ANN(input_dim, output_dim)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()  # Multi-class classification
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum = 0.9)


In [86]:
def train(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        
        # Training
        for inputs, labels in train_loader:
            optimizer.zero_grad()  # Clear gradients
            
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights
            
            running_loss += loss.item()
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Validation Accuracy: {accuracy:.2f}%')

# Train the model
train(model, train_loader, val_loader, criterion, optimizer, epochs=10)

Epoch 1/10, Loss: 3.7385, Validation Accuracy: 17.33%
Epoch 2/10, Loss: 2.8902, Validation Accuracy: 25.17%
Epoch 3/10, Loss: 2.4390, Validation Accuracy: 29.33%
Epoch 4/10, Loss: 2.1562, Validation Accuracy: 29.17%
Epoch 5/10, Loss: 1.8659, Validation Accuracy: 30.17%
Epoch 6/10, Loss: 1.6573, Validation Accuracy: 30.83%
Epoch 7/10, Loss: 1.4441, Validation Accuracy: 32.67%
Epoch 8/10, Loss: 1.2696, Validation Accuracy: 33.33%
Epoch 9/10, Loss: 1.0842, Validation Accuracy: 33.00%
Epoch 10/10, Loss: 0.9031, Validation Accuracy: 34.17%
