In [1]:
import random
import json
import os
import sys

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

from resnet_pytorch import ResNet
from datasets import load_dataset, load_metric, Dataset, DatasetDict

# load .env file
from dotenv import load_dotenv
load_dotenv()

sys.path.insert(0, '../')
from data_loader import get_data_to_load, split_json_and_image_files, load_json_files, load_image_files, load_json_file, load_image_file

### Loading data

In [2]:
# set number of files to load
NUMBER_OF_FILES = 1000

# get list with local data and file paths
list_files = get_data_to_load(loading_file='../3_data_preparation/04_data_cleaning/updated_data_list', file_location='../3_data_preparation/01_enriching/.data', image_file_location='../1_data_collection/.data', allow_new_file_creation=False, from_remote_only=True, download_link='env', limit=NUMBER_OF_FILES, shuffle_seed=42, allow_file_location_env=True, allow_json_file_location_env=True, allow_image_file_location_env=True)

json_files, image_files = split_json_and_image_files(list_files)
paired_files = list(zip(json_files, image_files))

All local files: 642825
Relevant files: 642825
Limited files: 3000


### example linus

In [3]:
class CustomImageNameDataset(Dataset):
    def __init__(self, image_paths, json_paths, transform=None):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        return self.image_paths[idx], self.json_paths[idx]

# Define transformations
transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [4]:
input_images = image_files
labels = json_files
assert len(input_images) == len(labels), "Mismatch in number of images and labels"

file_name_dataset = CustomImageNameDataset(input_images, labels, transform=transform)
file_name_loader = DataLoader(file_name_dataset, batch_size=64, shuffle=True)

In [5]:
countries = []
coordinates = []
transformed_images = []

for image_files, label_files in file_name_loader:
    images = load_image_files(image_files)
    labels = load_json_files(label_files)
    countries.extend([item['country_name'] for item in labels])
    coordinates.extend([item['coordinates'] for item in labels])
    transformed_images.extend(transform(images))
    print("Images batch shape:", images.shape)
    print("Labels batch shape:", labels.shape)
    break  # After printing the first batch, exit the loop

TypeError: list indices must be integers or slices, not list

### example lukas

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, image_paths, json_paths, transform=None):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.transform = transform
        self.country_to_index = {}
        self.create_country_mapping()

    def create_country_mapping(self):
        unique_countries = set()
        for path in self.json_paths:
            with open(path, 'r') as file:
                data = json.load(file)
                country = data.get('country_name', 'Unknown')
                unique_countries.add(country)
        self.country_to_index = {country: idx for idx, country in enumerate(unique_countries)}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # change here with the faster code with batches
        print(self.image_paths[idx])
        image = Image.open(self.image_paths[idx]).convert('RGB')
        with open(self.json_paths[idx], 'r') as file:
            data = json.load(file)
            country = data.get('country_name', 'Unknown')
        
        country_index = self.country_to_index[country]
        coordinates = torch.tensor(data.get('coordinates', [0, 0]), dtype=torch.float32)
        
        if self.transform:
            image = self.transform(image)

        return image, coordinates, country_index

class ImageDataHandler:
    def __init__(self, image_paths, json_paths, batch_size=10, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
        self.image_paths = image_paths
        self.json_paths = json_paths
        self.batch_size = batch_size

        # Define transformations
        self.transform_train = transforms.Compose([
            transforms.Resize((50, 50)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        self.transform_test = transforms.Compose([
            transforms.Resize((50, 50)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        # Initialize datasets and loaders
        self.train_loader, self.val_loader, self.test_loader = self.create_loaders(train_ratio, val_ratio, test_ratio)

    def create_loaders(self, train_ratio, val_ratio, test_ratio):
        combined = list(zip(self.image_paths, self.json_paths))
        random.shuffle(combined)
        total_count = len(combined)
        train_end = int(train_ratio * total_count)
        val_end = train_end + int(val_ratio * total_count)

        train_data = combined[:train_end]
        val_data = combined[train_end:val_end]
        test_data = combined[val_end:]

        # Extract separate lists for images and JSON paths
        train_images, train_jsons = zip(*train_data)
        val_images, val_jsons = zip(*val_data)
        test_images, test_jsons = zip(*test_data)

        # Create train, val- and test datasets
        train_dataset = CustomImageDataset(train_images, train_jsons, self.transform_train)
        val_dataset = CustomImageDataset(val_images, val_jsons, self.transform_test)
        test_dataset = CustomImageDataset(test_images, test_jsons, self.transform_test)

        # Create train, val- and test loaders
        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)

        return train_loader, val_loader, test_loader

In [None]:
# Creating Dataloasders with the classes
data_handler = ImageDataHandler(image_files, json_files)
train_loader = data_handler.train_loader
val_loader = data_handler.val_loader
test_loader = data_handler.test_loader

PRINT_FIRST = True

for images, coordinates, country_indices in train_loader:
    if PRINT_FIRST:
        print("Images batch shape:", images.shape)
        print("Coordinates batch shape:", coordinates.shape)
        print("Country indices:", country_indices.shape)
        PRINT_FIRST = False

TypeError: tuple indices must be integers or slices, not list

## Model

In [None]:
# 1. First try to predict the coordinates of the image
# 2. Then try to predict the country of the image with nodes

model = ResNet.from_pretrained('resnet18', num_classes=2)

In [None]:
#print(model)

## Training

In [None]:
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
net = ResNet50(10).to('cuda')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, patience=5)

In [None]:
EPOCHS = 200
for epoch in range(EPOCHS):
    losses = []
    running_loss = 0
    for i, inp in enumerate(trainloader):
        inputs, labels = inp
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()
    
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        losses.append(loss.item())

        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if i%100 == 0 and i > 0:
            print(f'Loss [{epoch+1}, {i}](epoch, minibatch): ', running_loss / 100)
            running_loss = 0.0

    avg_loss = sum(losses)/len(losses)
    scheduler.step(avg_loss)
            
print('Training Done')