In [1]:
# important for gpuhub
# !pip install -r ../../requirements.txt --upgrade

## Importing Libraries and tokens

In [2]:
import wandb
import os
import torch
import sys
from torchvision import transforms

# load .env file
from dotenv import load_dotenv
from geo_model_deployer import GeoModelDeployer
from image_data_handler_deploy import DeployImageDataHandler
sys.path.insert(0, '../5_evaluation')
from wandb_downloader import WandbDownloader

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [3]:
WANDB_TOKEN = os.getenv('WANDB_TOKEN')
# Define where to run
env_path = '../../.env'
if not WANDB_TOKEN and os.path.exists(env_path):
  load_dotenv(env_path)
  WANDB_TOKEN = os.getenv('WANDB_TOKEN')

In [4]:
# Check if GPU is available
if torch.cuda.is_available():
    print("GPU is available.")
    
    # Print the name of the GPU
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    
    # Print the total and available memory
    total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9  # Convert bytes to GB
    print(f"Total Memory: {total_memory:.2f} GB")

    allocated_memory = torch.cuda.memory_allocated(0) / 1e9  # Convert bytes to GB
    print(f"Allocated Memory: {allocated_memory:.2f} GB")

    cached_memory = torch.cuda.memory_reserved(0) / 1e9  # Convert bytes to GB
    print(f"Cached Memory: {cached_memory:.2f} GB")

    # Print other properties
    device_properties = torch.cuda.get_device_properties(0)
    print(f"CUDA Capability: {device_properties.major}.{device_properties.minor}")
    print(f"Multi-Processor Count: {device_properties.multi_processor_count}")
else:
    print("No GPU found.")

No GPU found.


## Loading files from wandb

In [5]:
wandb.login(key=WANDB_TOKEN) if WANDB_TOKEN else wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkillusions[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
# Example usage:
entity = "nlp_ls"
project = "dspro2-predicting-region"
predict_coordinates=False
predict_regions=True
metric_name = "Validation Accuracy Top 1"
data_augmentation = "base_augmentation" # or "full_augmentation_v2"
datasize = 332786  # Replace with the desired datasize
file_names_to_download = [".pth", ".json"]
image_size = [80, 130]

downloader = WandbDownloader(entity, project, data_augmentation, datasize, image_size)
run_data = downloader.get_and_collect_best_runs(metric_name, file_names_to_download)

# Now run_data contains all necessary information for further processing
import pprint
pprint.pprint(run_data)

Found 5 matching runs.
{'Best Run 1': {'files': {'best_model': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/best_model_checkpointmodel_efficientnet_b1_lr_0.01_opt_adamW_weightDecay_0.01_imgSize_[80, '
                                        '130]_predict_coordinates_False.pth',
                          'country_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/country_to_index.json',
                          'region_index_to_country_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/region_index_to_country_index.json',
                          'region_index_to_middle_point.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/region_index_to_middle_point.json',
                          'region_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240

In [7]:
run_data.keys()

dict_keys(['Best Run 1', 'Best Run 2', 'Best Run 3', 'Best Run 4', 'Best Run 5'])

In [8]:
# Print the validation accuracy for the top 1, 3, and 5 predictions
for i in [1, 3, 5]:
    print(f"Validation Accuracy Top {i}: ", run_data["Best Run 1"]["metrics"][f"Validation Accuracy Top {i}"])

Validation Accuracy Top 1:  0.2021875985996965
Validation Accuracy Top 3:  0.34047508150908246
Validation Accuracy Top 5:  0.4164250191565125


In [9]:
run_data["Best Run 1"]["parameters"]

{'seed': 42,
 'epochs': 50,
 'optimizer': 'adamW',
 'model_name': 'efficientnet_b1',
 'mapped_data': False,
 'dataset_size': 332786,
 'weight_decay': 0.01,
 'learning_rate': 0.01,
 'predict_regions': True,
 'input_image_size': [80, 130],
 'data_augmentation': 'base_augmentation',
 'different_regions': 4596,
 'dataset_identifier': '22a493044dbe99c1d431b9ee4656792efbb09ece4182274670ba5faec505d9cf',
 'different_countries': 138,
 'predict_coordinates': False}

In [10]:
base_transform = transforms.Compose([
          transforms.Resize(run_data["Best Run 1"]["parameters"]["input_image_size"]),
        ])
final_transform = transforms.Compose([
          transforms.ToTensor(),
          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

In [11]:
run_data["Best Run 1"]["files"]

{'wandb_manifest.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/artifact/918441477/wandb_manifest.json',
 'best_model': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/best_model_checkpointmodel_efficientnet_b1_lr_0.01_opt_adamW_weightDecay_0.01_imgSize_[80, 130]_predict_coordinates_False.pth',
 'country_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/country_to_index.json',
 'region_index_to_country_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/region_index_to_country_index.json',
 'region_index_to_middle_point.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/region_index_to_middle_point.json',
 'region_to_index.json': 'https://api.wandb.ai/files/nlp_ls/dspro2-predicting-region/kwlp08u6/run-20240623_193550-kwlp08u6/region_to_index.json',
 'wandb-metada

## Loading data and creating data loader

In [12]:
run = None

for i in range(5):
    run = run_data[f"Best Run {i+1}"]
    if run["files"].get("best_model", None):
        break
    else:
        run = None
        print(f"Run {i+1} does not contain the necessary files. Trying the next run...")
        
if run is None:
  raise Exception("No run with the necessary files found.")

# Creating Dataloaders with the classes
files = run["files"]
country_to_index = files.get('country_to_index.json', None)
region_to_index = files.get('region_to_index.json', None)
region_index_to_middle_point = files.get('region_index_to_middle_point.json', None)
region_index_to_country_index = files.get('region_index_to_country_index.json', None)

data_handler = DeployImageDataHandler(country_to_index, region_to_index, region_index_to_middle_point, region_index_to_country_index, base_transform, final_transform)
country_to_index = data_handler.country_to_index
region_to_index = data_handler.region_to_index
region_index_to_middle_point = data_handler.region_index_to_middle_point
region_index_to_country_index = data_handler.region_index_to_country_index

num_regions = data_handler.num_regions
num_countries = data_handler.num_countries

Loaded 138 countries.
Loaded 4596 regions.
Loaded 4596 region middle points.
Loaded 3595 region to country index mappings.


## Evaluating the model

In [13]:
num_classes = None if predict_coordinates else (num_regions if predict_regions else num_countries)

if num_classes == 0:
    raise ValueError("No classes detected. Please check the data.")

geo_model_tester = GeoModelDeployer(
    num_classes=num_classes, 
    predict_coordinates=predict_coordinates, 
    country_to_index=country_to_index,
    region_to_index=region_to_index,
    region_index_to_middle_point=region_index_to_middle_point,
    region_index_to_country_index=region_index_to_country_index,
    predict_regions=predict_regions if not predict_coordinates else None
)

  self.region_middle_points = torch.tensor([coordinates_to_cartesian(*coordinate) for coordinate in self.region_middle_points], dtype=torch.float32).to(self.device)


In [14]:
model_name = run_data["Best Run 1"]["parameters"]["model_name"]
pretrained_weights = run_data["Best Run 1"]["files"]['best_model']

geo_model_tester.prepare(model_type=model_name, model_path=pretrained_weights)

In [15]:
geo_model_tester.predict(data_handler.load_single_image('./test.png'), top_n=5)

([['Türkiye_Rize_TUR-2301',
   'Türkiye_Trabzon_TUR-2302',
   'Türkiye_Ordu_TUR-2293',
   'Türkiye_Bayburt_TUR-3032',
   'Türkiye_Gümüshane_TUR-3031',
   'Türkiye_Kocaeli_TUR-2266',
   'Türkiye_Giresun_TUR-2292',
   'Türkiye_Sakarya_TUR-2267',
   'Türkiye_Yalova_TUR-5518',
   'Türkiye_Düzce_TUR-5519',
   'Türkiye_Bilecik_TUR-2263',
   'Türkiye_Sivas_TUR-2295',
   'Türkiye_Erzincan_TUR-2300',
   'Türkiye_Tokat_TUR-2297',
   'Türkiye_Bursa_TUR-2264',
   'Türkiye_Bingöl_TUR-2308',
   'Türkiye_Samsun_TUR-2296',
   'Ukraine_Kharkiv_UKR-328',
   'Türkiye_Eskisehir_TUR-2269',
   'Türkiye_Erzurum_TUR-2299',
   'Türkiye_Bolu_TUR-2268',
   'Türkiye_Zinguldak_TUR-3010',
   'Türkiye_Istanbul_TUR-2265',
   'Türkiye_Tunceli_TUR-3045',
   'Türkiye_Batman_TUR-3042',
   "Ukraine_Dnipropetrovs'k_UKR-326",
   'Türkiye_Mus_TUR-2310',
   'Türkiye_Kütahya_TUR-2276',
   'Türkiye_Adana_TUR-3018',
   'Türkiye_Amasya_TUR-2290',
   'Türkiye_Kinkkale_TUR-3019',
   'Türkiye_Ankara_TUR-3008',
   'Türkiye_Afyonkarah