In [96]:
import random
import json
import os
import sys

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image

from resnet_pytorch import ResNet

# load .env file
from dotenv import load_dotenv
load_dotenv()

sys.path.insert(0, '../')
from data_loader import get_data_to_load, split_json_and_image_files, load_json_files, load_image_files, load_json_file, load_image_file

### Loading data

In [86]:
# set number of files to load
NUMBER_OF_FILES = 10000

# get list with local data and file paths
list_files = get_data_to_load(loading_file='../3_data_preparation/04_data_cleaning/updated_data_list', file_location='../3_data_preparation/01_enriching/.data', image_file_location='../1_data_collection/.data', allow_new_file_creation=False, from_remote_only=True, download_link='env', limit=NUMBER_OF_FILES, shuffle_seed=42, allow_file_location_env=True, allow_json_file_location_env=True, allow_image_file_location_env=True)

json_files, image_files = split_json_and_image_files(list_files)
paired_files = list(zip(json_files, image_files))

All local files: 642825
Relevant files: 642825
Limited files: 30000


### example linus

In [71]:
class CustomImageNameDataset(Dataset):
    def __init__(self, image_paths, json_paths, transform=None):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        return self.image_paths[idx], self.json_paths[idx]

# Define transformations
transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [72]:
input_images = image_files
labels = json_files
assert len(input_images) == len(labels), "Mismatch in number of images and labels"

file_name_dataset = CustomImageNameDataset(input_images, labels, transform=transform)
file_name_loader = DataLoader(file_name_dataset, batch_size=64, shuffle=True, num_workers=0)

In [73]:
file_name_loader.dataset.image_paths[0]

'/Users/lukasstoeckli/GitLabProjects/DSPRO2/dspro2/dspro2/.data/geoguessr_location_singleplayer_2EiE6ZCag3IWcwNR_3.png'

In [76]:
countries = []
coordinates = []
transformed_images = []

for image_files, label_files in file_name_loader:
    images = load_image_files(image_files)
    labels = load_json_files(label_files)
    countries.extend([item['country_name'] for item in labels])
    coordinates.extend([item['coordinates'] for item in labels])
    #transformed_images.extend(transform(images))
    #print("Images batch shape:", images.shape)
    #print("Labels batch shape:", labels.shape)
    break  # After printing the first batch, exit the loop

### example lukas

In [107]:
class CustomImageDataset(Dataset):
    def __init__(self, image_paths, json_paths, transform=None):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.transform = transform
        self.country_to_index = {}
        self.create_country_mapping()

    # map countries to indices, maybe not best approach
    def create_country_mapping(self):
        unique_countries = set()
        for path in self.json_paths:
            with open(path, 'r') as file:
                data = json.load(file)
                country = data.get('country_name', 'Unknown')
                unique_countries.add(country)
        self.country_to_index = {country: idx for idx, country in enumerate(unique_countries)}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        with open(self.json_paths[idx], 'r') as file:
            data = json.load(file)
            country = data.get('country_name', 'Unknown')
        
        country_index = self.country_to_index[country]
        coordinates = torch.tensor(data.get('coordinates', [0, 0]), dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)

        return image, coordinates, country_index

class ImageDataHandler:
    def __init__(self, image_paths, json_paths, batch_size=256, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.batch_size = batch_size

        # Transformations for training and testing
        self.transform_train = transforms.Compose([
            transforms.Resize((50, 50)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        self.transform_test = transforms.Compose([
            transforms.Resize((50, 50)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        # Initialize datasets and loaders
        self.train_loader, self.val_loader, self.test_loader = self.create_loaders(train_ratio, val_ratio, test_ratio)

    def create_loaders(self, train_ratio, val_ratio, test_ratio):
        combined = list(zip(self.image_paths, self.json_paths))
        random.shuffle(combined)
        total_count = len(combined)
        train_end = int(train_ratio * total_count)
        val_end = train_end + int(val_ratio * total_count)

        train_data = combined[:train_end]
        val_data = combined[train_end:val_end]
        test_data = combined[val_end:]

        # Extract separate lists for images and JSON paths
        train_images, train_jsons = zip(*train_data)
        val_images, val_jsons = zip(*val_data)
        test_images, test_jsons = zip(*test_data)

        # Create train, val- and test datasets
        train_dataset = CustomImageDataset(train_images, train_jsons, self.transform_train)
        val_dataset = CustomImageDataset(val_images, val_jsons, self.transform_test)
        test_dataset = CustomImageDataset(test_images, test_jsons, self.transform_test)

        # Create train, val- and test dataloaders
        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)

        return train_loader, val_loader, test_loader

In [115]:
# Creating Dataloasders with the classes
data_handler = ImageDataHandler(image_files, json_files)
train_dataloader = data_handler.train_loader
val_dataloader = data_handler.val_loader
test_dataloader = data_handler.test_loader

PRINT_FIRST = True

# Print forst batch as an example, to see the structure
# 7000 images need 59 sec for processing as information
for images, coordinates, country_indices in train_dataloader:
    if PRINT_FIRST:
        print("Images batch shape:", images.shape)
        print("Coordinates batch shape:", coordinates.shape)
        print("Country indices:", country_indices.shape)
        PRINT_FIRST = False

Images batch shape: torch.Size([256, 3, 50, 50])
Coordinates batch shape: torch.Size([256, 2])
Country indices: torch.Size([256])


## Model

In [116]:
# Load the pretrained model
model = ResNet.from_pretrained('resnet18', num_classes=2)

Loaded pretrained weights for resnet18.


In [117]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## Training

In [118]:
EPOCHS = 2

criterion = nn.CrossEntropyLoss()
model = ResNet.from_pretrained('resnet18', num_classes=2)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, patience=5)

Loaded pretrained weights for resnet18.


In [128]:
for epoch in range(EPOCHS):
    train_loss = 0.0
    model.train()

    for images, coordinates, country_indices in train_dataloader:
        #print("Images shape:", images.shape)
        #print("Coordinates shape:", coordinates.shape)
        #images, coordinates = images.to('cuda'), coordinates.to('cuda')
        optimizer.zero_grad()
        output = model(images)
        print("Output shape:", output.shape)
        loss = criterion(output, coordinates)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)

    train_loss /= len(train_dataloader.dataset)

    test_loss = 0.0
    model.eval()

    with torch.no_grad():
        for images, coordinates, country_indices in val_dataloader:
            #images, coordinates = images.to('cuda'), coordinates.to('cuda')
            output = model(images)
            loss = criterion(output, coordinates)

            test_loss += loss.item() * images.size(0)

    test_loss /= len(val_dataloader.dataset)

    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Output shape: torch.Size([256, 2])
tensor([-4.7878e+08,  4.7878e+08], grad_fn=<SelectBackward0>)
tensor([ 35.8800, 139.5729])


KeyboardInterrupt: 