In [196]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [193]:
import zipfile
zip_path = 'country_dataset.zip'
extract_dir = 'country_dataset'

if not os.path.exists(extract_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)

In [168]:
# Dataset
class CountryDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.country_to_idx = {country: idx for idx, country in enumerate(sorted(df['country'].unique()))}
        self.idx_to_country = {v: k for k, v in self.country_to_idx.items()}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = row['local_path']
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.country_to_idx[row['country']]
        return image, label

In [169]:
# Transformacje
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [194]:
df = pd.read_csv('country_dataset.csv', header=None, names=['country', 'lat', 'lon', 'local_path'])

BASE_FOLDER = "country_dataset"
df['local_path'] = df['local_path'].str.replace("\\", "/", regex=False)
df['local_path'] = df['local_path'].apply(lambda x: os.path.join(BASE_FOLDER, x))

def fix_path(path):
    parts = path.split('/')
    if len(parts) > 1 and parts[1].endswith('.'):
        parts[1] = parts[1].rstrip('.')
    return '/'.join(parts)

df['local_path'] = df['local_path'].apply(fix_path)
df = df[df['local_path'].apply(os.path.isfile)]

In [171]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['country'])

In [172]:
counts = df['country'].value_counts()
df = df[df['country'].isin(counts[counts > 1].index)]

In [174]:
label_counts = df['label'].value_counts()
valid_labels = label_counts[label_counts >= 10].index
df = df[df['label'].isin(valid_labels)]

In [177]:
trainval_df, test_df = train_test_split(df, test_size=0.1, stratify=df['label'], random_state=42)
train_df, val_df = train_test_split(trainval_df, test_size=0.1667, stratify=trainval_df['label'], random_state=42)

In [178]:
batch_size = 64
train_loader = DataLoader(CountryDataset(train_df, train_transform), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(CountryDataset(val_df, val_test_transform), batch_size=batch_size)
test_loader = DataLoader(CountryDataset(test_df, val_test_transform), batch_size=batch_size)

In [195]:
print("Rozkład klas:")
print(df['country'].value_counts())

Rozkład klas:
country
Vietnam      100
Palestine    100
Vanuatu      100
Taiwan       100
Denmark      100
            ... 
Iran           5
Argentina      4
Syria          1
Ukraine        1
S. Sudan       1
Name: count, Length: 158, dtype: int64


In [180]:
class GeoGuessCountryClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = models.resnet18(pretrained=True)
        for param in self.model.parameters():
            param.requires_grad = True
        self.model.fc = nn.Sequential(
            nn.Dropout(0.4),
            nn.Linear(self.model.fc.in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)

In [181]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GeoGuessCountryClassifier(num_classes=len(le.classes_)).to(device)



In [182]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(df_train['label']),
    y=df_train['label']
)
weights = torch.zeros(len(le.classes_), dtype=torch.float)

In [183]:
for cls, w in zip(np.unique(df_train['label']), class_weights):
    weights[cls] = w

weights = weights.to(device)
criterion = nn.CrossEntropyLoss(weight=weights.to(device))
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

In [184]:
# Trening
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in tqdm(loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

In [185]:
def eval_epoch(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

In [189]:
best_val_acc = 0.0
for epoch in range(30):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = eval_epoch(model, val_loader, criterion, device)
    scheduler.step()

    print(f"Epoch {epoch+1}/10: Train loss={train_loss:.4f}, acc={train_acc:.4f} | Val loss={val_loss:.4f}, acc={val_acc:.4f}")
    
    # Zapis najlepszego modelu
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pt")

Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 1/10: Train loss=0.9413, acc=0.7071 | Val loss=1.7622, acc=0.5291


Training: 100%|██████████| 145/145 [01:27<00:00,  1.67it/s]


Epoch 2/10: Train loss=0.8984, acc=0.7182 | Val loss=1.7609, acc=0.5286


Training: 100%|██████████| 145/145 [01:28<00:00,  1.65it/s]


Epoch 3/10: Train loss=0.8637, acc=0.7293 | Val loss=1.7474, acc=0.5302


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 4/10: Train loss=0.8473, acc=0.7339 | Val loss=1.7380, acc=0.5334


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 5/10: Train loss=0.8212, acc=0.7399 | Val loss=1.7389, acc=0.5366


Training: 100%|██████████| 145/145 [01:27<00:00,  1.67it/s]


Epoch 6/10: Train loss=0.8086, acc=0.7461 | Val loss=1.7254, acc=0.5361


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 7/10: Train loss=0.7954, acc=0.7464 | Val loss=1.7329, acc=0.5377


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 8/10: Train loss=0.7907, acc=0.7494 | Val loss=1.7328, acc=0.5350


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 9/10: Train loss=0.7784, acc=0.7505 | Val loss=1.7208, acc=0.5388


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 10/10: Train loss=0.7736, acc=0.7549 | Val loss=1.7182, acc=0.5415


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 11/10: Train loss=0.7695, acc=0.7565 | Val loss=1.7248, acc=0.5388


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 12/10: Train loss=0.7678, acc=0.7553 | Val loss=1.7169, acc=0.5409


Training: 100%|██████████| 145/145 [01:27<00:00,  1.67it/s]


Epoch 13/10: Train loss=0.7643, acc=0.7547 | Val loss=1.7250, acc=0.5415


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 14/10: Train loss=0.7599, acc=0.7566 | Val loss=1.7218, acc=0.5409


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 15/10: Train loss=0.7584, acc=0.7582 | Val loss=1.7198, acc=0.5393


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 16/10: Train loss=0.7612, acc=0.7566 | Val loss=1.7233, acc=0.5420


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 17/10: Train loss=0.7604, acc=0.7598 | Val loss=1.7201, acc=0.5404


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 18/10: Train loss=0.7554, acc=0.7637 | Val loss=1.7150, acc=0.5426


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 19/10: Train loss=0.7519, acc=0.7632 | Val loss=1.7258, acc=0.5463


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 20/10: Train loss=0.7508, acc=0.7595 | Val loss=1.7225, acc=0.5399


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 21/10: Train loss=0.7503, acc=0.7629 | Val loss=1.7242, acc=0.5377


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 22/10: Train loss=0.7531, acc=0.7608 | Val loss=1.7155, acc=0.5426


Training: 100%|██████████| 145/145 [01:26<00:00,  1.68it/s]


Epoch 23/10: Train loss=0.7552, acc=0.7610 | Val loss=1.7219, acc=0.5404


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 24/10: Train loss=0.7508, acc=0.7607 | Val loss=1.7168, acc=0.5393


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 25/10: Train loss=0.7566, acc=0.7598 | Val loss=1.7199, acc=0.5404


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 26/10: Train loss=0.7616, acc=0.7572 | Val loss=1.7222, acc=0.5426


Training: 100%|██████████| 145/145 [01:27<00:00,  1.67it/s]


Epoch 27/10: Train loss=0.7524, acc=0.7615 | Val loss=1.7119, acc=0.5409


Training: 100%|██████████| 145/145 [01:27<00:00,  1.66it/s]


Epoch 28/10: Train loss=0.7520, acc=0.7631 | Val loss=1.7154, acc=0.5399


Training: 100%|██████████| 145/145 [01:27<00:00,  1.65it/s]


Epoch 29/10: Train loss=0.7457, acc=0.7654 | Val loss=1.7167, acc=0.5404


Training: 100%|██████████| 145/145 [01:26<00:00,  1.67it/s]


Epoch 30/10: Train loss=0.7559, acc=0.7604 | Val loss=1.7124, acc=0.5361


In [197]:
# Ewaluacja
model.load_state_dict(torch.load("best_model.pt"))
model.eval()
_, test_acc = eval_epoch(model, test_loader, criterion, device)
print(f"\nAccuracy: {test_acc * 100:.2f}%")


Accuracy: 56.10%
