In [3]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import io
from torchvision.models import resnet18, ResNet18_Weights
from tqdm import tqdm  # For progress bars


In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import io
from torchvision.models import resnet18, ResNet18_Weights
from tqdm import tqdm  # For progress bars

# Custom dataset class with debug prints
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, limit=None):
        self.data = pd.read_csv(csv_file)
        if limit:  # Limit the dataset to a small number of observations
            self.data = self.data.head(limit)
        self.root_dir = root_dir
        self.transform = transform
        self.continent_mapping = {continent: idx for idx, continent in enumerate(self.data['continent'].unique())}
        self.data['continent_label'] = self.data['continent'].map(self.continent_mapping)

        print(f"Dataset initialized with {len(self.data)} samples.")
        print(f"Continents mapped: {self.continent_mapping}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx]['image_name'])
        print(img_path)
        image = io.read_image(img_path)
        if self.transform:
            image = self.transform(image)
        label = self.data.iloc[idx]['continent_label']

        if idx == 0:  # Show one sample for debugging
            print(f"Sample image shape: {image.shape}, Label: {label}")

        return image, label

# Training function with progress display
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss, correct = 0, 0
    with tqdm(train_loader, desc="Training", unit="batch") as pbar:
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()

            # Update progress bar with loss
            pbar.set_postfix(loss=loss.item())

    train_accuracy = correct / len(train_loader.dataset)
    train_loss /= len(train_loader)
    return train_loss, train_accuracy

# Testing function with progress display
def test(model, test_loader, criterion, device):
    model.eval()
    test_loss, correct = 0, 0
    with tqdm(test_loader, desc="Testing", unit="batch") as pbar:
        for inputs, labels in pbar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()

            # Update progress bar with loss
            pbar.set_postfix(loss=loss.item())

    test_accuracy = correct / len(test_loader.dataset)
    test_loss /= len(test_loader)
    return test_loss, test_accuracy

# Main training loop
def train_loop(csv_path, root_dir, num_epochs=2, batch_size=2, learning_rate=0.001, weight_decay=0.0001, limit=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Data transformations
    weights = ResNet18_Weights.DEFAULT
    transform = weights.transforms()

    # Dataset and DataLoaders (limited to `limit` samples for testing)
    dataset = CustomDataset(csv_file=csv_path, root_dir=root_dir, transform=transform, limit=limit)
    train_idx, test_idx = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
    train_set = torch.utils.data.Subset(dataset, train_idx)
    test_set = torch.utils.data.Subset(dataset, test_idx)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

    print(f"Training dataset size: {len(train_set)}")
    print(f"Testing dataset size: {len(test_set)}")

    # Load ResNet18 and modify the final layer
    resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
    for param in resnet.parameters():
        param.requires_grad = False  # Freeze all layers except the last
    num_features = resnet.fc.in_features
    resnet.fc = nn.Linear(num_features, 6)  # 6 continents
    resnet.to(device)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(resnet.fc.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training and evaluation
    metrics = pd.DataFrame(columns=['epoch', 'train_loss', 'train_accuracy', 'test_loss', 'test_accuracy'])

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        train_loss, train_accuracy = train(resnet, train_loader, optimizer, criterion, device)
        test_loss, test_accuracy = test(resnet, test_loader, criterion, device)

        # Save metrics and model
        metrics = metrics.append({'epoch': epoch, 'train_loss': train_loss, 'train_accuracy': train_accuracy,
                                  'test_loss': test_loss, 'test_accuracy': test_accuracy}, ignore_index=True)
        metrics.to_csv('metrics.csv', index=False)
        torch.save(resnet.state_dict(), 'model.pth')

        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    print("\nTraining complete! Model saved to 'model.pth' and metrics to 'metrics.csv'.")

# Example usage
# Ensure 'coords_processed.csv' and 'dataset/' are properly set up
train_loop(csv_path='coords_processed.csv', root_dir='dataset/', num_epochs=15, batch_size=32, limit=None)


Using device: cuda
Dataset initialized with 10 samples.
Continents mapped: {'North America': 0, 'South America': 1, 'Oceania': 2, 'Asia': 3, 'Africa': 4}
Training dataset size: 8
Testing dataset size: 2

Epoch 1/2


Training: 100%|██████████| 4/4 [00:00<00:00, 22.66batch/s, loss=2.49]


dataset/3.png
dataset/5.png
dataset/10.png
dataset/8.png
dataset/7.png
dataset/6.png
dataset/1.png
Sample image shape: torch.Size([3, 224, 224]), Label: 0
dataset/4.png


Testing:   0%|          | 0/1 [00:00<?, ?batch/s]

dataset/9.png
dataset/2.png


Testing: 100%|██████████| 1/1 [00:00<00:00, 33.89batch/s, loss=0.843]
  metrics = metrics.append({'epoch': epoch, 'train_loss': train_loss, 'train_accuracy': train_accuracy,


Train Loss: 2.0757, Train Accuracy: 0.2500
Test Loss: 0.8429, Test Accuracy: 1.0000

Epoch 2/2


Training:   0%|          | 0/4 [00:00<?, ?batch/s, loss=1.81]

dataset/10.png
dataset/5.png
dataset/4.png
dataset/8.png
dataset/6.png
dataset/7.png


Training: 100%|██████████| 4/4 [00:00<00:00, 30.07batch/s, loss=1.8]


dataset/3.png
dataset/1.png
Sample image shape: torch.Size([3, 224, 224]), Label: 0


Testing:   0%|          | 0/1 [00:00<?, ?batch/s]

dataset/9.png
dataset/2.png


Testing: 100%|██████████| 1/1 [00:00<00:00, 13.51batch/s, loss=0.784]
  metrics = metrics.append({'epoch': epoch, 'train_loss': train_loss, 'train_accuracy': train_accuracy,


Train Loss: 1.6564, Train Accuracy: 0.3750
Test Loss: 0.7843, Test Accuracy: 1.0000

Training complete! Model saved to 'quick_test_model.pth' and metrics to 'metrics.csv'.


In [None]:
def save_predictions(csv_path, root_dir, model_path, output_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Load the model
    weights = ResNet18_Weights.DEFAULT
    transform = weights.transforms()
    resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
    num_features = resnet.fc.in_features
    resnet.fc = nn.Linear(num_features, 6)  # 6 continents
    resnet.load_state_dict(torch.load(model_path, map_location=device))
    resnet.to(device)
    resnet.eval()

    # Load the test dataset
    dataset = CustomDataset(csv_file=csv_path, root_dir=root_dir, transform=transform)
    test_idx = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)[1]
    test_set = torch.utils.data.Subset(dataset, test_idx)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)

    # Create a DataFrame to store predictions
    predictions = []
    true_labels = []

    print("Generating predictions...")
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.to(device)
            outputs = resnet(inputs)
            _, preds = outputs.max(1)

            predictions.append(preds.item())
            true_labels.append(labels.item())

    # Map numeric labels back to continents
    full_dataset = pd.read_csv(csv_path)
    continent_mapping = {continent: idx for idx, continent in enumerate(full_dataset['continent'].unique())}
    inverse_mapping = {v: k for k, v in continent_mapping.items()}

    test_data = full_dataset.iloc[test_idx].copy()
    test_data['model_prediction'] = [inverse_mapping[pred] for pred in predictions]

    # Save the test dataset with predictions
    test_data.to_csv(output_path, index=False)
    print(f"Predictions saved to {output_path}")


save_predictions(
    csv_path='coords_processed.csv',    # Input CSV with all data
    root_dir='dataset/',               # Path to the dataset folder
    model_path='model.pth',            # Path to the saved trained model
    output_path='test_predictions.csv' # Output CSV with predictions
)

Using device: cuda


  resnet.load_state_dict(torch.load(model_path, map_location=device))


Dataset initialized with 10000 samples.
Continents mapped: {'North America': 0, 'South America': 1, 'Oceania': 2, 'Asia': 3, 'Africa': 4, 'Europe': 5}
Generating predictions...
dataset/6253.png
dataset/4685.png
dataset/1732.png
dataset/4743.png
dataset/4522.png
dataset/6341.png
dataset/577.png
dataset/5203.png
dataset/6364.png
dataset/440.png
dataset/2751.png
dataset/7488.png
dataset/5273.png
dataset/5654.png
dataset/4000.png
dataset/6034.png
dataset/583.png
dataset/9931.png
dataset/7052.png
dataset/8159.png
dataset/9897.png
dataset/2250.png
dataset/4641.png
dataset/9486.png
dataset/4948.png
dataset/9921.png
dataset/1964.png
dataset/8244.png
dataset/6591.png
dataset/8848.png
dataset/322.png
dataset/2679.png
dataset/4626.png
dataset/4950.png
dataset/8329.png
dataset/3338.png
dataset/5590.png
dataset/252.png
dataset/3974.png
dataset/6631.png
dataset/5548.png
dataset/36.png
dataset/8363.png
dataset/1514.png
dataset/9318.png
dataset/40.png
dataset/4820.png
dataset/3466.png
dataset/1761.png