In [1]:
# important for gpuhub
# !pip install -r ../../requirements.txt --upgrade

## Importing Libraries and tokens

In [2]:
import wandb
import os
import torch
import sys
from torchvision import transforms

# load .env file
from dotenv import load_dotenv
from geo_model_deployer import GeoModelDeployer
from image_data_handler_deploy import DeployImageDataHandler

sys.path.insert(0, "../5_evaluation")
from wandb_downloader import WandbDownloader

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
WANDB_TOKEN = os.getenv("WANDB_TOKEN")
# Define where to run
env_path = "../../.env"
if not WANDB_TOKEN and os.path.exists(env_path):
    load_dotenv(env_path)
    WANDB_TOKEN = os.getenv("WANDB_TOKEN")

In [4]:
# Check if GPU is available
if torch.cuda.is_available():
    print("GPU is available.")

    # Print the name of the GPU
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

    # Print the total and available memory
    total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert bytes to GB
    print(f"Total Memory: {total_memory:.2f} GB")

    allocated_memory = torch.cuda.memory_allocated(0) / 1e9  # Convert bytes to GB
    print(f"Allocated Memory: {allocated_memory:.2f} GB")

    cached_memory = torch.cuda.memory_reserved(0) / 1e9  # Convert bytes to GB
    print(f"Cached Memory: {cached_memory:.2f} GB")

    # Print other properties
    device_properties = torch.cuda.get_device_properties(0)
    print(f"CUDA Capability: {device_properties.major}.{device_properties.minor}")
    print(f"Multi-Processor Count: {device_properties.multi_processor_count}")
else:
    print("No GPU found.")

No GPU found.


## Loading files from wandb

In [5]:
wandb.login(key=WANDB_TOKEN) if WANDB_TOKEN else wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkillusions[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
# Example usage:
entity = "nlp_ls"
predict_coordinates = False
predict_regions = False
project = "dspro2-predicting-region" if predict_regions else ("dspro2-predicting-coordinates" if predict_coordinates else "dspro2-predicting-country")
metric_name = "Best Validation Accuracy Top 1" if not predict_coordinates else "Best Validation Distance (km)"
data_augmentation = "full_augmentation_v2"  # or "base_augmentation"
datasize = 332786  # Replace with the desired datasize
file_names_to_download = [".pth", ".json"]
image_size = [80, 130]

downloader = WandbDownloader(entity, project, data_augmentation, datasize, image_size)
run_data = downloader.get_and_collect_best_runs(metric_name, file_names_to_download)

# Now run_data contains all necessary information for further processing
import pprint

pprint.pprint(run_data)

dspro2-predicting-country: Found 4 matching runs for datasize 332786 and full_augmentation_v2.
{'Best Run 1': {'files': {'best_model': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/best_model_checkpointmodel_efficientnet_b1_lr_0.01_opt_adamW_weightDecay_0.01_imgSize_[80, '
                                        '130]_predict_coordinates_False.pth',
                          'country_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/country_to_index.json',
                          'region_index_to_country_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_country_index.json',
                          'region_index_to_middle_point.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_middle_point.json',
                          'region_to_index.json': 'h

In [7]:
run_data.keys()

dict_keys(['Best Run 1', 'Best Run 2', 'Best Run 3', 'Best Run 4'])

In [8]:
# Print the validation accuracy for the top 1, 3, and 5 predictions
if predict_coordinates:
    print(f"Best Validation Distance (km): ", run_data["Best Run 1"]["metrics"]["Best Validation Distance (km)"])
else:
    for i in [1, 3, 5]:
        print(f"Best Validation Accuracy Top {i}: ", run_data["Best Run 1"]["metrics"][f"Best Validation Accuracy Top {i}"])

Best Validation Accuracy Top 1:  0.6489625433838665
Best Validation Accuracy Top 3:  0.8262391634238322
Best Validation Accuracy Top 5:  0.8834382559310066


In [9]:
run_data["Best Run 1"]["parameters"]

{'seed': 42,
 'epochs': 25,
 'optimizer': 'adamW',
 'batch_size': 200,
 'model_name': 'efficientnet_b1',
 'mapped_data': False,
 'dataset_size': 332786,
 'weight_decay': 0.01,
 'learning_rate': 0.01,
 'predict_regions': False,
 'input_image_size': [80, 130],
 'data_augmentation': 'full_augmentation_v2',
 'different_regions': 4596,
 'dataset_identifier': '22a493044dbe99c1d431b9ee4656792efbb09ece4182274670ba5faec505d9cf',
 'different_countries': 138,
 'predict_coordinates': False}

In [10]:
run_data["Best Run 1"]["files"]

{'wandb_manifest.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/artifact/931421173/wandb_manifest.json',
 'best_model': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/best_model_checkpointmodel_efficientnet_b1_lr_0.01_opt_adamW_weightDecay_0.01_imgSize_[80, 130]_predict_coordinates_False.pth',
 'country_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/country_to_index.json',
 'region_index_to_country_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_country_index.json',
 'region_index_to_middle_point.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_middle_point.json',
 'region_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_to_index.json',
 'test_d

## Loading data and creating data loader

In [11]:
run = None

for i in range(5):
    run = run_data[f"Best Run {i+1}"]
    if run["files"].get("best_model", None):
        break
    else:
        run = None
        print(f"Run {i+1} does not contain the necessary files. Trying the next run...")

if run is None:
    raise Exception("No run with the necessary files found.")

augmented_transform = None  # Never used for test data
base_transform = transforms.Compose([transforms.Resize((image_size[0], image_size[1])), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Creating Dataloaders with the classes
files = run["files"]
country_to_index = files.get("country_to_index.json", None)
region_to_index = files.get("region_to_index.json", None)
region_index_to_middle_point = files.get("region_index_to_middle_point.json", None)
region_index_to_country_index = files.get("region_index_to_country_index.json", None)

data_handler = DeployImageDataHandler(country_to_index, region_to_index, region_index_to_middle_point, region_index_to_country_index, base_transform)
country_to_index = data_handler.country_to_index
region_to_index = data_handler.region_to_index
region_index_to_middle_point = data_handler.region_index_to_middle_point
region_index_to_country_index = data_handler.region_index_to_country_index

num_regions = data_handler.num_regions
num_countries = data_handler.num_countries

Loaded 138 countries.
Loaded 4596 regions.
Loaded 4596 region middle points.
Loaded 3595 region to country index mappings.


## Evaluating the model

In [12]:
num_classes = 3 if predict_coordinates else (num_regions if predict_regions else num_countries)

if num_classes == 0:
    raise ValueError("No classes detected. Please check the data.")

geo_model = GeoModelDeployer(num_classes=num_classes, predict_coordinates=predict_coordinates, country_to_index=country_to_index, region_to_index=region_to_index, region_index_to_middle_point=region_index_to_middle_point, region_index_to_country_index=region_index_to_country_index, predict_regions=predict_regions if not predict_coordinates else None)

In [13]:
model_name = run["parameters"]["model_name"]
pretrained_weights = run["files"]["best_model"]

geo_model.prepare(model_type=model_name, model_path=pretrained_weights)

In [17]:
test_file_names = ["./test.png", "./test2.jpg", "./test3.jpg"]

for test_file_name in test_file_names:

    # new line
    print()

    print(test_file_name)

    if predict_coordinates:
        coordinates, cartesian = geo_model.predict_single(data_handler.load_single_image(test_file_name), top_n=5)

        # new line
        print()

        print(f"Predicted Coordinates: {coordinates} (Cartesian: {cartesian})")
    else:
        if predict_regions:
            regions, region_indices, region_probabilities, countries, country_indices, country_probabilities, corresponding_countries, corresponding_country_indices = geo_model.predict_single(data_handler.load_single_image(test_file_name), top_n=5)

            # new line
            print()

            # Print the top 5 regions
            for i, (region, region_index, region_probability, corresponding_country, corresponding_country_index) in enumerate(zip(regions, region_indices, region_probabilities, corresponding_countries, corresponding_country_indices)):
                print(f"Region {i+1}: {region} ({region_index}) with Probability: {region_probability:.3f}, in Country: {corresponding_country} ({corresponding_country_index})")
        else:
            countries, country_indices, country_probabilities = geo_model.predict_single(data_handler.load_single_image(test_file_name), top_n=5)

        # new line
        print()

        # Print the top 5 countries
        for i, (country, country_index, country_probability) in enumerate(zip(countries, country_indices, country_probabilities)):
            print(f"Country {i+1}: {country} ({country_index}) with Probability: {country_probability:.2f}")

# Multiple would be like this, every value is returned as a list, zip them to iterate over samples
# geo_model.predict(data_handler.load_images(['./test1.png', './test2.png']), top_n=5)


./test.png
Cropping image to 16:9 aspect ratio.

Country 1: Russian Federation (103) with Probability: 0.58
Country 2: Estonia (34) with Probability: 0.21
Country 3: Ukraine (127) with Probability: 0.19
Country 4: Türkiye (125) with Probability: 0.02
Country 5: Albania (0) with Probability: 0.00

./test2.jpg
Cropping image to 16:9 aspect ratio.

Country 1: United Kingdom (129) with Probability: 0.78
Country 2: Netherlands (86) with Probability: 0.07
Country 3: Taiwan, Province of China (119) with Probability: 0.06
Country 4: Italy (57) with Probability: 0.02
Country 5: Norway (90) with Probability: 0.01

./test3.jpg
Cropping image to 16:9 aspect ratio.

Country 1: France (38) with Probability: 0.29
Country 2: United States (130) with Probability: 0.21
Country 3: Japan (58) with Probability: 0.16
Country 4: Mexico (79) with Probability: 0.15
Country 5: United Kingdom (129) with Probability: 0.08



Aborted!


: 