In [1]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
from PIL import Image



In [3]:
# Load classification labels
classification_labels = pd.read_csv('Data/input/classification_labels.csv')
name_to_ancestors = pd.read_csv('Data/input/name_to_ancestors.csv')

# Custom dataset class
class InsectDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.labels_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.labels_frame.iloc[idx, 0])
        image = Image.open(img_name)
        label = self.labels_frame.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

In [4]:
# Data transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = InsectDataset(csv_file='Data/input/classification_labels.csv', 
                              root_dir='Data/input/images_resized', 
                              transform=data_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [6]:
# Model selection
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(set(classification_labels['label'])))
model = model.to('cuda')

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to('cuda'), labels.to('cuda')

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch}/{num_epochs} Loss: {epoch_loss:.4f}')

KeyError: 'label'

In [None]:
# Save model
torch.save(model.state_dict(), 'insect_model.pth')

# Prediction and saving results to predictions.csv
def predict_and_save(model, dataset, output_file):
    model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, _ in dataset:
            inputs = inputs.to('cuda')
            outputs = model(inputs.unsqueeze(0))
            _, preds = torch.max(outputs, 1)
            predictions.append(preds.item())
    
    dataset.labels_frame['predictions'] = predictions
    dataset.labels_frame.to_csv(output_file, index=False)

In [None]:
test_dataset = InsectDataset(csv_file='Data/output/predictions.csv', 
                             root_dir='Data/input/images_resized', 
                             transform=data_transforms)

predict_and_save(model, test_dataset, 'Data/output/predictions.csv')