In [1]:
# important for gpuhub
# !pip install -r ../../requirements.txt --upgrade

## Importing Libraries and tokens

In [2]:
import wandb
import os
import torch

# load .env file
from dotenv import load_dotenv
from wandb_downloader import WandbDownloader
from geo_model_tester import GeoModelTester
from image_data_handler_test import TestImageDataHandler
from best_run_loader import BestRunLoader

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
WANDB_TOKEN = os.getenv("WANDB_TOKEN")
# Define where to run
env_path = "../../.env"
if not WANDB_TOKEN and os.path.exists(env_path):
    load_dotenv(env_path)
    WANDB_TOKEN = os.getenv("WANDB_TOKEN")

In [4]:
# Check if GPU is available
if torch.cuda.is_available():
    print("GPU is available.")

    # Print the name of the GPU
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

    # Print the total and available memory
    total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert bytes to GB
    print(f"Total Memory: {total_memory:.2f} GB")

    allocated_memory = torch.cuda.memory_allocated(0) / 1e9  # Convert bytes to GB
    print(f"Allocated Memory: {allocated_memory:.2f} GB")

    cached_memory = torch.cuda.memory_reserved(0) / 1e9  # Convert bytes to GB
    print(f"Cached Memory: {cached_memory:.2f} GB")

    # Print other properties
    device_properties = torch.cuda.get_device_properties(0)
    print(f"CUDA Capability: {device_properties.major}.{device_properties.minor}")
    print(f"Multi-Processor Count: {device_properties.multi_processor_count}")
else:
    print("No GPU found.")

No GPU found.


## Loading files from wandb

In [5]:
wandb.login(key=WANDB_TOKEN) if WANDB_TOKEN else wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkillusions[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
# Example usage:
entity = "nlp_ls"
predict_coordinates = False
predict_regions = False
project = "dspro2-predicting-region" if predict_regions else ("dspro2-predicting-coordinates" if predict_coordinates else "dspro2-predicting-country")
metric_name = "Best Validation Accuracy Top 1" if not predict_coordinates else "Best Validation Distance (km)"
metric_ascending = False if not predict_coordinates else True
data_augmentation = "full_augmentation_v2"  # or "base_augmentation"
datasize = 332786  # Replace with the desired datasize
file_names_to_download = [".pth", ".json"]
image_size = [80, 130]

downloader = WandbDownloader(entity, project, data_augmentation, datasize, image_size)
run_data = downloader.get_and_collect_best_runs(metric_name, file_names_to_download, metric_ascending=metric_ascending)

dspro2-predicting-country: Found 4 matching runs for datasize 332786 and full_augmentation_v2.


In [7]:
# Print the validation accuracy for the top 1, 3, and 5 predictions
for j in range(1, min(len(run_data), 6)):
    print(f"\nBest Run {j}: {run_data[f'Best Run {j}']['id']}")
    if predict_coordinates:
        print(f"Best Validation Distance (km): ", run_data[f"Best Run {j}"]["metrics"]["Best Validation Distance (km)"])
    else:
        for i in [1, 3, 5]:
            print(f"Best Validation Accuracy Top {i}: ", run_data[f"Best Run {j}"]["metrics"][f"Best Validation Accuracy Top {i}"])


Best Run 1: bc69qzqh
Best Validation Accuracy Top 1:  0.6489625433838665
Best Validation Accuracy Top 3:  0.8262391634238322
Best Validation Accuracy Top 5:  0.8834382559310066

Best Run 2: kth52fnv
Best Validation Accuracy Top 1:  0.6238261940892769
Best Validation Accuracy Top 3:  0.8143996874859143
Best Validation Accuracy Top 5:  0.8779091605691363

Best Run 3: 8pqe6jmh
Best Validation Accuracy Top 1:  0.5845816367925237
Best Validation Accuracy Top 3:  0.7875054464594258
Best Validation Accuracy Top 5:  0.8600147242213441


In [8]:
run_data["Best Run 1"]["parameters"]

{'seed': 42,
 'epochs': 25,
 'optimizer': 'adamW',
 'batch_size': 200,
 'model_name': 'efficientnet_b1',
 'mapped_data': False,
 'dataset_size': 332786,
 'weight_decay': 0.01,
 'learning_rate': 0.01,
 'predict_regions': False,
 'input_image_size': [80, 130],
 'data_augmentation': 'full_augmentation_v2',
 'different_regions': 4596,
 'dataset_identifier': '22a493044dbe99c1d431b9ee4656792efbb09ece4182274670ba5faec505d9cf',
 'different_countries': 138,
 'predict_coordinates': False}

In [9]:
run_data["Best Run 1"]["files"]

{'wandb_manifest.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/artifact/931421173/wandb_manifest.json',
 'best_model': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/best_model_checkpointmodel_efficientnet_b1_lr_0.01_opt_adamW_weightDecay_0.01_imgSize_[80, 130]_predict_coordinates_False.pth',
 'country_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/country_to_index.json',
 'region_index_to_country_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_country_index.json',
 'region_index_to_middle_point.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_index_to_middle_point.json',
 'region_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-country/bc69qzqh/run-20240627_133143-bc69qzqh/region_to_index.json',
 'test_d

## Loading metrics for best runs for each datasize, with and without augmentation

In [10]:
# Example usage:
entity = "nlp_ls"
metric_name = "Best Validation Accuracy Top 1"
project_names = ["coordinates", "region", "country"]
file_names_to_download = [".pth", ".json"]

loader = BestRunLoader(entity, metric_name, project_names, file_names_to_download)

# See the keys of the results
print("\n", loader.results.keys(), "\n")

# See the keys of the results
print(loader.results["dspro2-predicting-region_base_augmentation_81505_(80, 130)"].keys())

dspro2-predicting-coordinates: No matching runs found for datasize 81505 and base_augmentation.
dspro2-predicting-coordinates: No matching runs found for datasize 81505 and full_augmentation_v2.
dspro2-predicting-coordinates: Found 5 matching runs for datasize 332786 and base_augmentation.
dspro2-predicting-coordinates: No matching runs found for datasize 332786 and full_augmentation_v2.
dspro2-predicting-coordinates: Found 5 matching runs for datasize 79000 and base_augmentation.
dspro2-predicting-region: Found 5 matching runs for datasize 81505 and base_augmentation.
dspro2-predicting-region: Found 2 matching runs for datasize 81505 and full_augmentation_v2.
dspro2-predicting-region: Found 5 matching runs for datasize 332786 and base_augmentation.
Could not find test_data.pth for run 9wly2kt2, looks like run l47ryx7t was deleted or is part of a different project.
dspro2-predicting-region: Found 4 matching runs for datasize 332786 and full_augmentation_v2.
dspro2-predicting-region: Fo

In [11]:
# Count runs per project
project_run_counts = loader.count_runs_per_project()
print(f"Found runs for each project", project_run_counts)

Found runs for each project {'dspro2-predicting-coordinates': 10, 'dspro2-predicting-region': 21, 'dspro2-predicting-country': 22}


In [12]:
# Print the validation accuracy for the top 1, 3, and 5 predictions
coordinates_print = False

for j in range(1, min(len(loader.results["dspro2-predicting-coordinates_base_augmentation_332786_(80, 130)"]), 6)):
    print(f"\nBest Run {j}")
    if coordinates_print:
        print(loader.results["dspro2-predicting-coordinates_base_augmentation_332786_(80, 130)"][f"Best Run {j}"]["metrics"][f"Validation Accuracy Top {i}"])
    else:
        for i in [1, 3, 5]:
            try:
                # print(loader.results["dspro2-predicting-country_base_augmentation_332786_(80, 130)"][f"Best Run {j}"]["metrics"][f"Validation Accuracy Top {i}"])
                print(loader.results["dspro2-predicting-country_full_augmentation_v2_332786_(80, 130)"][f"Best Run {j}"]["metrics"][f"Validation Accuracy Top {i}"])
            except:
                print("No run found")


Best Run 1
0.6398575656955692
0.8168336914223898
0.8735670177441892

Best Run 2
0.6238261940892769
0.8143996874859143
0.8779091605691363

Best Run 3
0.5754616343885692
0.7823369442733297
0.8547710984569618

Best Run 4
0.5953543579187763
0.7978875249785898
0.8674970326186577


## Loading data and creating data loader

In [13]:
cache = True

run = None

for i in range(min(len(run_data), 5)):
    run = run_data[f"Best Run {i+1}"]
    if run["files"].get("test_data", None) and run["files"].get("best_model", None):
        break
    else:
        run = None
        print(f"Run {i+1} does not contain the necessary files. Trying the next run...")

if run is None:
    raise Exception("No run with the necessary files found.")

# Creating Dataloaders with the classes
test_dataset = run["files"]["test_data"]
files = run["files"]
country_to_index = files.get("country_to_index.json", None)
region_to_index = files.get("region_to_index.json", None)
region_index_to_middle_point = files.get("region_index_to_middle_point.json", None)
region_index_to_country_index = files.get("region_index_to_country_index.json", None)

data_handler = TestImageDataHandler(test_dataset, country_to_index, region_to_index, region_index_to_middle_point, region_index_to_country_index, cache=cache)
test_dataloader = data_handler.test_loader
country_to_index = data_handler.country_to_index
region_to_index = data_handler.region_to_index
region_index_to_middle_point = data_handler.region_index_to_middle_point
region_index_to_country_index = data_handler.region_index_to_country_index

num_regions = data_handler.num_regions
num_countries = data_handler.num_countries

Loaded 138 countries.
Loaded 4596 regions.
Loaded 4596 region middle points.
Loaded 3595 region to country index mappings.
Loading test data from test_data.pth


In [None]:
import wandb
import numpy as np

# Log in to WandB
wandb.login()

# Replace with your entity, project, and run ID
entity = "nlp_ls"
projects = ["dspro2-predicting-country", "dspro2-predicting-region", "dspro2-predicting-coordinates"]
run_ids = "*" # or []

# Value to add to the summary
summary_key = None # "test_data_run_id"
summary_value = None # "w1098m89"

# Push best Validation Accuracy * and Validation Distance and Validation Loss as Logs
push_best = True

# Access the run
api = wandb.Api()
for project in projects:
    project_run_ids = run_ids
    if project_run_ids == '*':
        # Get all runs of the project
        project_run_ids = []
        print(f"Getting run id's for project {project}")
        for run in api.runs(f"{entity}/{project}"):
            project_run_ids.append(run.id)
        print(f"Found {len(project_run_ids)} runs")
    for run_id in project_run_ids:
        print(f"Checking run {run_id}")
        if summary_key is not None and summary_value is not None:
            run = api.run(f"{entity}/{project}/{run_id}")

            # Update the summary
            run.summary[summary_key] = summary_value
            run.summary.update()

            print(f"Successfully updated summary of run {run_id} with {summary_key}: {summary_value}")
        
        # Push best Validation Accuracy * and Validation Distance and Validation Loss to summary
        # Technically, this is different from pushing it after training, because there it will use the values of the best epoch, however, this is a good approximation
        if push_best:
            run = api.run(f"{entity}/{project}/{run_id}")
            # Get all metrics in summary that start with "Validation"
            metrics = run.summary.keys()
            validation_metrics = [k for k in metrics if k.lower().startswith("validation")]
            best_validation_metrics = {}
            # For every validation metric, push the best value from the history
            for metric in validation_metrics:
                best_key = f"Best {metric}"
                if best_key in metrics:
                    continue
                all_values = run.history()[metric] # numpy array
                all_values_without_nan = all_values[~np.isnan(all_values)]
                if len(all_values_without_nan) > 0:
                    best_value = np.max(all_values_without_nan) if "accuracy" in metric.lower() or "correct" in metric.lower() else np.min(all_values_without_nan)
                else:
                    best_value = np.nan
                best_validation_metrics[best_key] = best_value
                # Update the summary
                run.summary[best_key] = best_value
            if len(best_validation_metrics) > 0:
                run.summary.update()
                print(f"Successfully updated summary of run {run_id} with {best_validation_metrics}")
            else:
                print(f"Skipped updating summary of run {run_id}")

Getting run id's for project dspro2-predicting-region
Found 55 runs
Checking run 7f0wprn5
Skipped updating summary of run 7f0wprn5
Checking run hl40ix17
Skipped updating summary of run hl40ix17
Checking run xr2tkule
Skipped updating summary of run xr2tkule
Checking run udglvnth
Skipped updating summary of run udglvnth
Checking run 6518mmnp
Skipped updating summary of run 6518mmnp
Checking run ov5mhb9g
Skipped updating summary of run ov5mhb9g
Checking run cf8ecfwe
Skipped updating summary of run cf8ecfwe
Checking run j3zik6vp
Successfully updated summary of run j3zik6vp with {'Best Validation Accuracy Top 3 Country': 0.055, 'Best Validation Distance (km)': 4829.95891947686, 'Best Validation Accuracy Top 3': 0.018544303797468353, 'Best Validation Accuracy Top 1 Country': 0.02677215189873418, 'Best Validation Accuracy Top 1': 0.011265822784810127, 'Best Validation Accuracy Top 5': 0.023607594936708862, 'Best Validation Loss': 21.82173878633523, 'Best Validation Accuracy Top 5 Country': 0.

In [None]:
import wandb
import numpy as np

# Log in to WandB
wandb.login()

# Replace with your entity, project, and run ID
entity = "nlp_ls"
projects = ["dspro2-predicting-country", "dspro2-predicting-region", "dspro2-predicting-coordinates"]
run_ids = "*" # or []

# Value to add to the summary
summary_key = None # "test_data_run_id"
summary_value = None # "w1098m89"

# Push best Validation Accuracy * and Validation Distance and Validation Loss as Logs
push_best = True

# Access the run
api = wandb.Api()
for project in projects:
    project_run_ids = run_ids
    if project_run_ids == '*':
        # Get all runs of the project
        project_run_ids = []
        print(f"Getting run id's for project {project}")
        for run in api.runs(f"{entity}/{project}"):
            project_run_ids.append(run.id)
        print(f"Found {len(project_run_ids)} runs")
    for run_id in project_run_ids:
        print(f"Checking run {run_id}")
        if summary_key is not None and summary_value is not None:
            run = api.run(f"{entity}/{project}/{run_id}")

            # Update the summary
            run.summary[summary_key] = summary_value
            run.summary.update()

            print(f"Successfully updated summary of run {run_id} with {summary_key}: {summary_value}")
        
        # Push best Validation Accuracy * and Validation Distance and Validation Loss to summary
        # Technically, this is different from pushing it after training, because there it will use the values of the best epoch, however, this is a good approximation
        if push_best:
            run = api.run(f"{entity}/{project}/{run_id}")
            # Get all metrics in summary that start with "Validation"
            metrics = run.summary.keys()
            validation_metrics = [k for k in metrics if k.lower().startswith("validation")]
            best_validation_metrics = {}
            # For every validation metric, push the best value from the history
            for metric in validation_metrics:
                best_key = f"Best {metric}"
                if best_key in metrics:
                    continue
                all_values = run.history()[metric] # numpy array
                all_values_without_nan = all_values[~np.isnan(all_values)]
                if len(all_values_without_nan) > 0:
                    best_value = np.max(all_values_without_nan) if "accuracy" in metric.lower() or "correct" in metric.lower() else np.min(all_values_without_nan)
                else:
                    best_value = np.nan
                best_validation_metrics[best_key] = best_value
                # Update the summary
                run.summary[best_key] = best_value
            if len(best_validation_metrics) > 0:
                run.summary.update()
                print(f"Successfully updated summary of run {run_id} with {best_validation_metrics}")
            else:
                print(f"Skipped updating summary of run {run_id}")

Getting run id's for project dspro2-predicting-region
Found 55 runs
Checking run 7f0wprn5
Skipped updating summary of run 7f0wprn5
Checking run hl40ix17
Skipped updating summary of run hl40ix17
Checking run xr2tkule
Skipped updating summary of run xr2tkule
Checking run udglvnth
Skipped updating summary of run udglvnth
Checking run 6518mmnp
Skipped updating summary of run 6518mmnp
Checking run ov5mhb9g
Skipped updating summary of run ov5mhb9g
Checking run cf8ecfwe
Skipped updating summary of run cf8ecfwe
Checking run j3zik6vp
Successfully updated summary of run j3zik6vp with {'Best Validation Accuracy Top 3 Country': 0.055, 'Best Validation Distance (km)': 4829.95891947686, 'Best Validation Accuracy Top 3': 0.018544303797468353, 'Best Validation Accuracy Top 1 Country': 0.02677215189873418, 'Best Validation Accuracy Top 1': 0.011265822784810127, 'Best Validation Accuracy Top 5': 0.023607594936708862, 'Best Validation Loss': 21.82173878633523, 'Best Validation Accuracy Top 5 Country': 0.

## Evaluating the model

In [None]:
num_classes = 3 if predict_coordinates else (num_regions if predict_regions else num_countries)

if num_classes == 0:
    raise ValueError("No classes detected. Please check the data.")

geo_model_tester = GeoModelTester(test_dataloader=test_dataloader, num_classes=num_classes, predict_coordinates=predict_coordinates, country_to_index=country_to_index, region_to_index=region_to_index, region_index_to_middle_point=region_index_to_middle_point, region_index_to_country_index=region_index_to_country_index, predict_regions=predict_regions if not predict_coordinates else None)

  self.region_middle_points = torch.tensor([coordinates_to_cartesian(*coordinate) for coordinate in self.region_middle_points], dtype=torch.float32).to(self.device)


In [None]:
# TODO: Test the model from best runs
# TODO: Show the different models with the best results (also do it for different data sizes and mapped/non-mapped data)
model_name = run["parameters"]["model_name"]
pretrained_weights = run["files"]["best_model"]

# Countries from 81k more mapped dataset
countries_only = ["Albania", "Argentina", "Australia", "Austria", "Bangladesh", "Belgium", "Bolivia, Plurinational State of", "Botswana", "Brazil", "Bulgaria", "Cambodia", "Canada", "Chile", "Colombia", "Croatia", "Czechia", "Denmark", "Dominican Republic", "Ecuador", "Estonia", "Eswatini", "Finland", "France", "Germany", "Ghana", "Greece", "Guatemala", "Hungary", "India", "Indonesia", "Ireland", "Israel", "Italy", "Japan", "Kenya", "Korea, Republic of", "Kyrgyzstan", "Lao People's Democratic Republic", "Latvia", "Lesotho", "Lithuania", "Malaysia", "Malta", "Mexico", "Montenegro", "Netherlands", "New Zealand", "Nigeria", "North Macedonia", "Norway", "Peru", "Philippines", "Poland", "Portugal", "Romania", "Russian Federation", "Rwanda", "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "South Africa", "Spain", "Sri Lanka", "Sweden", "Switzerland", "Thailand", "T\u00fcrkiye", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay"]

geo_model_tester.test(model_type=model_name, model_path=pretrained_weights, balanced_on_countries_only=countries_only, accuracy_per_country=False)

if not predict_coordinates:
    # And over all countries
    geo_model_tester.test(model_type=model_name, model_path=pretrained_weights, balanced_on_countries_only=None, accuracy_per_country=True)



Test Loss: 43.8764, Test Distance: 1753.0771, Test Distance Median: 385.5583
Test Top 1 Accuracy: 0.2005, Test Top 3 Accuracy: 0.3405, Test Top 5 Accuracy: 0.4153
Test Top 1 Accuracy (Country): 0.6473, Test Top 3 Accuracy (Country): 0.7085, Test Top 5 Accuracy (Country): 0.7346
Test Top 1 Balanced Accuracy: 0.0845, Test Top 1 Balanced Accuracy (Country): 0.5469
Test Loss: 43.8764, Test Distance: 1753.0771, Test Distance Median: 385.5583
Test Top 1 Accuracy: 0.2005, Test Top 3 Accuracy: 0.3405, Test Top 5 Accuracy: 0.4153
Test Top 1 Accuracy (Country): 0.6473, Test Top 3 Accuracy (Country): 0.7085, Test Top 5 Accuracy (Country): 0.7346
Test Top 1 Balanced Accuracy: 0.0845, Test Top 1 Balanced Accuracy (Country): 0.4161
Accuracy per country:
Country Faroe Islands: 1.00000
Country Qatar: 1.00000
Country Rwanda: 0.94737
Country India: 0.93060
Country Japan: 0.87262
Country Senegal: 0.83824
Country Ghana: 0.82278
Country Lesotho: 0.82203
Country United States: 0.81454
Country Nigeria: 0.780

