In [315]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim

# Step 1: Define the Custom Dataset Class
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_id = self.annotations.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_id + '.jpg')
        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        labels = self.annotations.iloc[idx, 1:].astype('float32').values  # Load all labels
        return image, torch.tensor(labels)
    
# Step 2: Define Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to a standard size
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize with mean and std
])

# Step 3: Initialize the Dataset and DataLoader
csv_file = '../data/train/train.csv'  # Path to the CSV file
img_dir = '../data/train/train_images'  # Path to the image directory

dataset = CustomImageDataset(csv_file, img_dir, transform)

# # Limit the Dataset size
subset_indices = list(range(1000))
subset_dataset = Subset(dataset, subset_indices)

# Split dataset into training, validation, and test sets
train_size = int(0.8 * len(subset_dataset))
val_size = len(subset_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(subset_dataset, [train_size, val_size])

# Define relevant variables for the ML task (Hyperparameters)
batch_size = 8
learning_rate = 0.00001
num_epochs = 10

# Define dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [316]:
# Step 4: Create custom CNN Feature Extractor
class CustomFeatureExtractor(nn.Module):
    def __init__(self):
        super(CustomFeatureExtractor, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)  # Additional layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 14 * 14, 512)  # Adjust for new feature size
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        return x

In [317]:
# Step 5: Define a Classifier
class MultiLabelClassifier(nn.Module):
    def __init__(self, input_dim, num_labels):
        super(MultiLabelClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.dropout = nn.Dropout(0.5)  # Dropout layer to prevent overfitting
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_labels)  # Modify the number of output labels
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [318]:
# Step 6: Initialize the Model, Loss, and Optimizer

num_labels = 13  # Number of labels to predict
input_dim = 512

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

feature_extractor = CustomFeatureExtractor().to(device)
classifier = MultiLabelClassifier(input_dim, num_labels).to(device)

criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(classifier.parameters(), lr=learning_rate)

In [319]:
# Step 7: Training Loop with Validation

# Function to calculate accuracy
def calculate_accuracy(predictions, labels, threshold=0.5):
    preds = (predictions > threshold).float()
    correct = (preds == labels).float().sum()
    accuracy = correct / (labels.size(0) * labels.size(1))
    return accuracy

# Training loop with accuracy calculation
for epoch in range(num_epochs):
    classifier.train()
    train_loss = 0.0
    train_accuracy = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        features = feature_extractor(images)
        outputs = classifier(features)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        train_accuracy += calculate_accuracy(outputs, labels).item() * images.size(0)

    train_loss /= len(train_loader.dataset)
    train_accuracy /= len(train_loader.dataset)

    # Validation
    classifier.eval()
    val_loss = 0.0
    val_accuracy = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            features = feature_extractor(images)
            outputs = classifier(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            val_accuracy += calculate_accuracy(outputs, labels).item() * images.size(0)

    val_loss /= len(val_loader.dataset)
    val_accuracy /= len(val_loader.dataset)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

Epoch [1/10], Train Loss: 0.8785, Train Acc: 0.6894, Val Loss: 0.8527, Val Acc: 0.7004
Epoch [2/10], Train Loss: 0.8340, Train Acc: 0.6894, Val Loss: 0.8086, Val Acc: 0.7004
Epoch [3/10], Train Loss: 0.7902, Train Acc: 0.6894, Val Loss: 0.7664, Val Acc: 0.7004
Epoch [4/10], Train Loss: 0.7457, Train Acc: 0.6894, Val Loss: 0.7201, Val Acc: 0.7004
Epoch [5/10], Train Loss: 0.6955, Train Acc: 0.6894, Val Loss: 0.6678, Val Acc: 0.7004
Epoch [6/10], Train Loss: 0.6403, Train Acc: 0.6894, Val Loss: 0.6111, Val Acc: 0.7004
Epoch [7/10], Train Loss: 0.5820, Train Acc: 0.6894, Val Loss: 0.5534, Val Acc: 0.7004
Epoch [8/10], Train Loss: 0.5232, Train Acc: 0.6917, Val Loss: 0.4941, Val Acc: 0.7342
Epoch [9/10], Train Loss: 0.4607, Train Acc: 0.7595, Val Loss: 0.4310, Val Acc: 0.7773
Epoch [10/10], Train Loss: 0.3943, Train Acc: 0.7644, Val Loss: 0.3633, Val Acc: 0.7773


In [150]:
torch.save(feature_extractor.state_dict(), '../data/train/find_features.pth')

In [157]:
# Load and preprocess the image
def load_image(image_path, transform):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0)  # Add batch dimension
    return image

# Load the pre-trained weights
feature_extractor.load_state_dict(torch.load('../data/train/find_features.pth'))

image_path = '../data/train/train_images/0f93d1482d7fb9f2b6a5085ee729142b.jpg'  # Replace with the path to your image

# Load and preprocess the image
image = load_image(image_path, transform).to(device)

with torch.no_grad():
    features = feature_extractor(image)
    output = classifier(features).squeeze()
    predictions = (output > 0.5).float()

labels = ["Subject Focus", "Eyes", "Face", "Near", "Action", "Accessory", "Group", "Collage", "Human", "Occlusion", "Info", "Blur"]
predictions_dict = {label: predictions[i].item() for i, label in enumerate(labels)}

print("Predictions:", predictions_dict)


Predictions: {'Subject Focus': 0.0, 'Eyes': 1.0, 'Face': 1.0, 'Near': 0.0, 'Action': 0.0, 'Accessory': 0.0, 'Group': 0.0, 'Collage': 0.0, 'Human': 0.0, 'Occlusion': 0.0, 'Info': 0.0, 'Blur': 0.0}
