In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [2]:
# Define CNN model from scratch
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # First convolutional layer (input: 1 channel grayscale image)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        
        # Fully connected layers (assuming input image size of 400x300)
        self.fc1 = nn.Linear(128 * 50 * 37, 512)  # Adjust based on image size
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 5)  # Output layer for 5 classes

        # Pooling layer and activation function
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with ReLU and pooling
        x = self.pool(self.relu(self.conv1(x)))  # Output: 32 x 200 x 150
        x = self.pool(self.relu(self.conv2(x)))  # Output: 64 x 100 x 75
        x = self.pool(self.relu(self.conv3(x)))  # Output: 128 x 50 x 37

        # Flatten the output from the convolutional layers
        x = x.view(-1, 128 * 50 * 37)  # Flattened to 128 * 50 * 37

        # Fully connected layers with ReLU
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))

        # Output layer (no activation, logits)
        x = self.fc3(x)
        return x

In [3]:
# Define transformation for grayscale images
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Ensure the images are in grayscale (1 channel)
    transforms.Resize((300, 400)),  # Resize images to 300x400
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize grayscale images
])

In [7]:
# Load the datasets
train_dataset = datasets.ImageFolder(root='./dataset/train', transform=transform)
val_dataset = datasets.ImageFolder(root='./dataset/val', transform=transform)

In [9]:
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

In [10]:
# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel().to(device)

# class_counts = [239, 289, 585, 731, 168]
# total_samples = sum(class_counts)
# class_weights = [total_samples / (len(class_counts) * count) for count in class_counts]
# class_weights = torch.FloatTensor(class_weights).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [11]:
# Training parameters
num_epochs = 50
train_loss_history = []
val_loss_history = []
train_acc_history = []
val_acc_history = []

In [12]:
# Training and validation loop
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0
    correct_train = 0
    total_train = 0

    # Training loop
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track training loss and accuracy
        running_train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_train_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)

    # Validation loop
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Track validation loss and accuracy
            running_val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss = running_val_loss / len(val_loader)
    val_acc = 100 * correct_val / total_val
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.2f}%')


Epoch [1/50], Train Loss: 1.5089, Train Accuracy: 34.64%, Val Loss: 1.4602, Val Accuracy: 39.09%
Epoch [2/50], Train Loss: 1.4184, Train Accuracy: 37.92%, Val Loss: 1.4624, Val Accuracy: 38.29%
Epoch [3/50], Train Loss: 1.3677, Train Accuracy: 41.05%, Val Loss: 1.4002, Val Accuracy: 41.27%
Epoch [4/50], Train Loss: 1.2984, Train Accuracy: 44.58%, Val Loss: 1.4532, Val Accuracy: 42.86%
Epoch [5/50], Train Loss: 1.1278, Train Accuracy: 53.38%, Val Loss: 1.4429, Val Accuracy: 40.08%
Epoch [6/50], Train Loss: 0.6310, Train Accuracy: 76.79%, Val Loss: 2.0643, Val Accuracy: 34.33%
Epoch [7/50], Train Loss: 0.1890, Train Accuracy: 94.23%, Val Loss: 2.6149, Val Accuracy: 38.49%
Epoch [8/50], Train Loss: 0.0432, Train Accuracy: 98.61%, Val Loss: 3.6711, Val Accuracy: 36.90%
Epoch [9/50], Train Loss: 0.0131, Train Accuracy: 99.80%, Val Loss: 4.0880, Val Accuracy: 37.70%
Epoch [10/50], Train Loss: 0.0024, Train Accuracy: 99.95%, Val Loss: 4.4705, Val Accuracy: 38.10%
Epoch [11/50], Train Loss: 0.

KeyboardInterrupt: 

In [None]:
# Plot loss and accuracy
epochs_range = range(1, num_epochs + 1)

plt.figure(figsize=(12, 5))

# Plot training and validation loss
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_loss_history, label='Training Loss')
plt.plot(epochs_range, val_loss_history, label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_acc_history, label='Training Accuracy')
plt.plot(epochs_range, val_acc_history, label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()

plt.show()

In [25]:
# Save the trained model
model_save_path = './weights/custom-aug1.pth'
torch.save(model.state_dict(), model_save_path)


# Test code

In [26]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms

# Load the trained model (make sure it's in evaluation mode)
model = CNNModel().to(device)
model.load_state_dict(torch.load('./weights/custom-aug1.pth'))
model.eval()

# Define transformation for test images
test_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Ensure the images are in grayscale (1 channel)
    transforms.Resize((300, 400)),  # Resize images to 300x400
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize grayscale images
])

# Directory containing test images
test_dir = './data/dataset/test'
test_images = [f for f in os.listdir(test_dir) if f.endswith('.jpg')]

# Sort filenames to ensure correct order
test_images.sort(key=lambda x: int(os.path.splitext(x)[0]))  # Assuming filenames are numeric

# Initialize lists to store filenames and predictions
filenames = []
predictions = []

# Process each image in the test directory
for image_name in test_images:
    image_path = os.path.join(test_dir, image_name)
    image = Image.open(image_path)
    image = test_transform(image)
    image = image.unsqueeze(0).to(device)  # Add batch dimension and move to device

    # Predict the class
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)
        class_id = predicted.item()

    # Save results
    filenames.append(os.path.splitext(image_name)[0])  # Remove .jpg from filename
    predictions.append(class_id + 1)  # Map class_id to 1-based index

# Save results to a CSV file
results_df = pd.DataFrame({'ID': filenames, 'Predictions': predictions})
results_df.to_csv('predictions.csv', index=False)
