In [1]:
# import resnet 18
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import io
from torchvision.models import resnet18, ResNet18_Weights
from sklearn.model_selection import train_test_split
import numpy as np
import os
import csv
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from time import perf_counter


In [None]:
def read_image(image_path):
    img = io.read_image(image_path)

    weights = ResNet18_Weights.DEFAULT
    transform = weights.transforms()
    return transform(img)

def train(model, train_loader, optimizer, criterion):
    model.train()
    train_loss, correct = 0, 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()

    train_accuracy = correct / len(train_loader.dataset)
    train_loss /= len(train_loader)

    return train_loss, train_accuracy

def test(model, test_loader, criterion):
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()

    test_accuracy = correct / len(test_loader.dataset)
    test_loss /= len(test_loader)

    return test_loss, test_accuracy

class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        # Map continents to numeric labels
        self.continent_mapping = {continent: idx for idx, continent in enumerate(self.data['continent'].unique())}
        self.data['continent_label'] = self.data['continent'].map(self.continent_mapping)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_name'])
        image = io.read_image(img_path)
        if self.transform:
            image = self.transform(image)
        label = self.data.iloc[idx]['continent_label']
        return image, label

def train_loop(csv_path, root_dir, num_epochs=15, batch_size=32, learning_rate=0.001, weight_decay=0.0001):
    # Read CSV and set up transformations
    print("Reading CSV and setting up transformations...")
    weights = ResNet18_Weights.DEFAULT
    transform = weights.transforms()
    
    print("Setting up dataset and data loaders...")
    dataset = CustomDataset(csv_file=csv_path, root_dir=root_dir, transform=transform)
    
    # Train-test split
    print("Splitting data into training and testing sets...")
    train_idx, test_idx = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
    train_set = torch.utils.data.Subset(dataset, train_idx)
    test_set = torch.utils.data.Subset(dataset, test_idx)
    
    with open('train_idx.txt', 'w') as f:
        for item in train_idx:
            f.write("%s\n" % item)
    
    with open('test_idx.txt', 'w') as f:
        for item in test_idx:
            f.write("%s\n" % item)
    
    
   
    
    # Data loaders
    print("Setting up data loaders...")
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # Load ResNet18 and modify the final layer
    print("Loading ResNet18 and modifying the final layer...")
    resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
    for param in resnet.parameters():
        param.requires_grad = False
    num_features = resnet.fc.in_features
    resnet.fc = nn.Linear(num_features, 6)  # 6 output layers for 6 continents
    
    # Criterion and optimizer
    print("Setting up criterion and optimizer...")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(resnet.fc.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    # Track metrics
    metrics = pd.DataFrame(columns=['epoch', 'train_loss', 'train_accuracy', 'test_loss', 'test_accuracy'])
    
    print("Training model...")
    for epoch in range(num_epochs):
        train_loss, train_accuracy = train(resnet, train_loader, optimizer, criterion)
        test_loss, test_accuracy = test(resnet, test_loader, criterion)
        
        # Save model and metrics
        metrics = metrics.append({'epoch': epoch, 'train_loss': train_loss, 'train_accuracy': train_accuracy, 
                                  'test_loss': test_loss, 'test_accuracy': test_accuracy}, ignore_index=True)
        metrics.to_csv('metrics.csv', index=False)
        torch.save(resnet.state_dict(), 'model.pth')
        
        print(f"Epoch {epoch + 1}/{num_epochs}: "
              f"Train Loss={train_loss:.4f}, Train Acc={train_accuracy:.4f}, "
              f"Test Loss={test_loss:.4f}, Test Acc={test_accuracy:.4f}")
    
    print("Training complete!")

# Example usage
# Assuming 'data.csv' contains the relevant columns and images are stored in 'images/' directory
train_loop(csv_path='coords_processed.csv', root_dir='data/archives', num_epochs=15)


Reading CSV and setting up transformations...
Setting up dataset and data loaders...
Splitting data into training and testing sets...
Setting up data loaders...
Loading ResNet18 and modifying the final layer...
Setting up criterion and optimizer...
Training model...
