In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import os

In [2]:
# Print current directory and contents
print("Current working directory:", os.getcwd())
print("Contents of current directory:", os.listdir())

Current working directory: c:\Users\Dell\Desktop\Task2Code\INM702
Contents of current directory: ['.git', 'coursework_task_1.ipynb', 'engine_data.csv', 'main.ipynb', 'README.md', 'Task2codefile.ipynb', 'updated_pollution_dataset.csv']


In [3]:
base_dir = r'c:\Users\Dell\Desktop\Task2Code'

In [4]:
# Create dataset paths 
train_path = os.path.join('Train', 'Train')
valid_path = os.path.join('Validation', 'Validation')
test_path = os.path.join('Test', 'Test')

In [5]:
# Define transforms
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Smaller size for baseline model
    transforms.ToTensor()
])

In [6]:
# Create dataset paths
train_path = os.path.join(base_dir, 'Train', 'Train')
valid_path = os.path.join(base_dir, 'Validation', 'Validation')
test_path = os.path.join(base_dir, 'Test', 'Test')

In [7]:

# Create datasets
train_dataset = datasets.ImageFolder(train_path, transform=transform)
valid_dataset = datasets.ImageFolder(valid_path, transform=transform)
test_dataset = datasets.ImageFolder(test_path, transform=transform)


In [8]:
# Print dataset information
print(f'Number of training images: {len(train_dataset)}')
print(f'Number of validation images: {len(valid_dataset)}')
print(f'Number of test images: {len(test_dataset)}')
print(f'Classes: {train_dataset.classes}')


Number of training images: 1322
Number of validation images: 60
Number of test images: 150
Classes: ['Healthy', 'Powdery', 'Rust']


In [9]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

BaseLine Model

In [11]:
class BaselineCNN(nn.Module):
    def __init__(self):
        super(BaselineCNN, self).__init__()
        # Just one conv layer
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Two fully connected layers
        self.fc1 = nn.Linear(16 * 16 * 16, 64)
        self.fc2 = nn.Linear(64, 3)  # 3 classes
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 16 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [12]:
# Initialize model and training components
model = BaselineCNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

Train the baseline model

In [20]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Simple training components
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

print("Starting training...")
num_epochs = 5 

for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    train_acc = 100. * correct / total
    
    # Validation phase
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    val_acc = 100. * correct / total
    
    print(f'Epoch {epoch+1}/{num_epochs}:')
    print(f'Loss: {running_loss/len(train_loader):.3f}')
    print(f'Train Accuracy: {train_acc:.2f}%')
    print(f'Validation Accuracy: {val_acc:.2f}%\n')

# Simple evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

print(f'Test Accuracy: {100.*correct/total:.2f}%')

Starting training...
Epoch 1/5:
Loss: 0.646
Train Accuracy: 70.80%
Validation Accuracy: 46.67%

Epoch 2/5:
Loss: 0.605
Train Accuracy: 75.64%
Validation Accuracy: 60.00%

Epoch 3/5:
Loss: 0.613
Train Accuracy: 75.79%
Validation Accuracy: 48.33%

Epoch 4/5:
Loss: 0.583
Train Accuracy: 75.19%
Validation Accuracy: 41.67%

Epoch 5/5:
Loss: 0.584
Train Accuracy: 75.87%
Validation Accuracy: 56.67%

Test Accuracy: 58.67%


In [21]:
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_accuracy = 100. * correct / total 
print(f'Test Accuracy: {test_accuracy:.2f}%')

Test Accuracy: 58.67%


In [22]:
baseline_results = {
    'test_accuracy': test_accuracy,  
    'model_state': model.state_dict().copy()
}


print(f"Baseline Test Accuracy: {float(baseline_results['test_accuracy']):.2f}%")



baseline_transform = transform  
baseline_model = model  

Baseline Test Accuracy: 58.67%
