Reads the training configuration yaml

Generate a PAT embedding model in `assets/models/PAT/`, with `.pth` checkpoints every 5 epochs

In [1]:
import os
import zipfile
import dotenv
import subprocess
import pandas as pd
from typing import Dict, Any, List
import json
import yaml
import time
import glob
import hashlib
import shutil
import logging
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from transformers import pipeline
import torch

config_train = "config/UrbanElementsReID_train.yml"
config_train_embedding_r = "config/UrbanElementsReID_train_embedding_r.yml"
config_train_plusplus = "config/UrbanElementsReID_train_patplusplus.yml"  # Needs the file from config_train_embedding_r
dataset_augmented: str = 'assets/datasets/urban-reid-challenge-augmented'

################ Probably nothing has to be modified from now on ################
logging.root.setLevel(logging.INFO)

if not torch.cuda.is_available():
    logging.warning("Where is your GPU dude?")

assert os.path.exists('assets'), 'are you are in the right folder?'
assert os.getcwd().endswith('PAT'), 'are you are in the right folder?'

!nvidia-smi

Sun May 11 23:03:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-PCIE-16GB           Off |   00000000:18:00.0 Off |                    0 |
| N/A   42C    P0             27W /  250W |       4MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# Base embedding model

In [None]:
with open(config_train, 'r') as f:
    hyperparams_train = yaml.load(f, Loader=yaml.BaseLoader)
model_path = os.path.join(hyperparams_train['LOG_ROOT'], hyperparams_train['LOG_NAME'])

if os.path.exists(model_path):
    shutil.rmtree(model_path)

In [4]:
!python train.py --config_file {config_train}

2025-04-30 12:09:57,454 PAT INFO: Saving model in the path :assets/models/PAT
2025-04-30 12:09:57,455 PAT INFO: Namespace(config_file='config/UrbanElementsReID_train.yml', opts=[], local_rank=0)
2025-04-30 12:09:57,455 PAT INFO: Loaded configuration file config/UrbanElementsReID_train.yml
2025-04-30 12:09:57,455 PAT INFO: 
MODEL:
  PRETRAIN_CHOICE: 'imagenet'
  #PRETRAIN_PATH: "../../.cache/torch/hub/checkpoints" # root of pretrain path
  PRETRAIN_PATH: "assets/models" 
  IF_LABELSMOOTH: 'on'
  IF_WITH_CENTER: 'no'
  NAME: 'part_attention_vit'
  NO_MARGIN: True
  DEVICE_ID: ('0')
  TRANSFORMER_TYPE: 'vit_base_patch16_224_TransReID'
  STRIDE_SIZE: [16, 16]

INPUT:
  SIZE_TRAIN: [256,128]
  SIZE_TEST: [256,128]
  REA:
    ENABLED: False
  PIXEL_MEAN: [0.5, 0.5, 0.5]
  PIXEL_STD: [0.5, 0.5, 0.5]
  LGT: # Local Grayscale Transfomation
    DO_LGT: True
    PROB: 0.5

DATASETS:
  TRAIN: ('UrbanElementsReID',)
  TEST: ('UrbanElementsReID',)
  #ROOT_DIR: ('../../data') # root of datasets
  #RO

In [14]:
files = glob.glob(os.path.join(model_path, "part_attention_vit_*.pth"))
max_epoch = max([int(f.split('_')[-1].split('.')[0]) for f in files])
assert max_epoch > 0
assert max_epoch == int(hyperparams_train['SOLVER']['MAX_EPOCHS'])
assert os.path.exists(os.path.join(model_path, f'part_attention_vit_{max_epoch}.pth'))
logging.info(f'max_epoch={max_epoch}')

INFO:root:max_epoch=60


# Refinements embedding model

In [4]:
with open(config_train_embedding_r, 'r') as f:
    hyperparams_train_embedding_r = yaml.load(f, Loader=yaml.BaseLoader)
model_path_embedding_r = os.path.join(hyperparams_train_embedding_r['LOG_ROOT'], hyperparams_train_embedding_r['LOG_NAME'])
dataset_replaced: str = hyperparams_train_embedding_r['DATASETS']['ROOT_DIR']
if os.path.exists(model_path_embedding_r):
    shutil.rmtree(model_path_embedding_r)
#hyperparams_train_embedding_r

In [19]:
def process_files(dir_path: str) -> None:
    if not os.path.isdir(dir_path):
        raise RuntimeError(f"Directory {dir_path} does not exist")

    initial_files: List[str] = os.listdir(dir_path)
    logging.info(f"Found {len(initial_files)} initial files")

    original_refinement_count = sum(
        1 for filename in initial_files if filename.endswith("_refinement_A.jpg")
    )
    logging.info(f"Original refinement files: {original_refinement_count}")

    # Step 1: Delete non-refinement files
    for filename in initial_files:
        if not filename.endswith("_refinement_A.jpg"):
            file_path = os.path.join(dir_path, filename)
            logging.debug(f"Deleting {file_path}")
            os.remove(file_path)

    # Step 2: Rename remaining files
    remaining_files: List[str] = os.listdir(dir_path)
    if len(remaining_files) != original_refinement_count:
        raise RuntimeError(f"After deletion, expected {original_refinement_count} files, found {len(remaining_files)}")
    for filename in remaining_files:
        if not filename.endswith("_refinement_A.jpg"):
            raise RuntimeError(f"Unexpected file {filename} after deletion")
        new_name = filename.replace("_refinement_A", "")
        src_path = os.path.join(dir_path, filename)
        dst_path = os.path.join(dir_path, new_name)
        logging.debug(f"Renaming {src_path} to {dst_path}")
        os.rename(src_path, dst_path)

    # Step 3: Verify final count
    final_files: List[str] = os.listdir(dir_path)
    if len(final_files) != original_refinement_count:
        raise RuntimeError(f"After renaming, expected {original_refinement_count} files, found {len(final_files)}")
    logging.info(f'Now there are {len(final_files)} images in path {dir_path}')

!rm -r assets/datasets/urban-reid-challenge-augmented-replaced
if not os.path.exists(dataset_replaced):
    logging.info('Dataset for training was not yet generating, trying to generate now...')
    if not os.path.exists(dataset_augmented):
        raise RuntimeError('Dataset for trainig can not be generated yet, missing generated images')
    !cp -r {dataset_augmented} {dataset_replaced}
    logging.info('Copied from augmented')
    for split in [
        'query',
        'test',
        'train',
    ]:
        process_files(f"assets/datasets/urban-reid-challenge-augmented-replaced/image_{split}")
    
        df = pd.read_csv(f"assets/datasets/urban-reid-challenge-augmented-replaced/{split}.csv")
        assert 'imageName' in df.columns
        df2 = df.loc[~df['imageName'].str.contains("_refinement_")].copy()
        #print(df2.tail(4))
        assert df2.shape[0] > 0
        #print(df2.head(4))
        df2.to_csv(f"assets/datasets/urban-reid-challenge-augmented-replaced/{split}.csv", index=False)
        logging.info(f"Overwrote {split} csv to have only {df2.shape[0]} (out of {df.shape[0]} originally)")

INFO:root:Dataset for training was not yet generating, trying to generate now...
INFO:root:Copied from augmented
INFO:root:Found 1384 initial files
INFO:root:Original refinement files: 346
INFO:root:Now there are 346 images in path assets/datasets/urban-reid-challenge-augmented-replaced/image_query
INFO:root:Overwrote query csv to have only 346 (out of 1384 originally)
INFO:root:Found 2016 initial files
INFO:root:Original refinement files: 1008
INFO:root:Now there are 1008 images in path assets/datasets/urban-reid-challenge-augmented-replaced/image_test
INFO:root:Overwrote test csv to have only 1008 (out of 2016 originally)
INFO:root:Found 7214 initial files
INFO:root:Original refinement files: 3607
INFO:root:Now there are 3607 images in path assets/datasets/urban-reid-challenge-augmented-replaced/image_train
INFO:root:Overwrote train csv to have only 3607 (out of 7214 originally)


In [21]:
!ls assets/datasets/urban-reid-challenge-augmented-replaced

image_query		    test_backup.csv
image_test		    test_with_description.csv
image_train		    test_with_refinements.csv
query.csv		    train.csv
query_backup.csv	    train_backup.csv
query_with_description.csv  train_backup_original.csv
query_with_refinements.csv  train_with_description.csv
sample_submission.csv	    train_with_refinements.csv
test.csv


In [22]:
!ls -lah assets/datasets/urban-reid-challenge-augmented-replaced/image_query

total 17M
drwxr-xr-x. 2 benle1 benle1  68K May  6 16:49 .
drwxr-xr-x. 6 benle1 benle1 4.0K May  6 16:49 ..
-rw-r--r--. 1 benle1 benle1  63K May  6 16:49 000001.jpg
-rw-r--r--. 1 benle1 benle1  71K May  6 16:49 000002.jpg
-rw-r--r--. 1 benle1 benle1  64K May  6 16:49 000003.jpg
-rw-r--r--. 1 benle1 benle1  45K May  6 16:49 000004.jpg
-rw-r--r--. 1 benle1 benle1  52K May  6 16:49 000005.jpg
-rw-r--r--. 1 benle1 benle1  70K May  6 16:49 000006.jpg
-rw-r--r--. 1 benle1 benle1  44K May  6 16:49 000007.jpg
-rw-r--r--. 1 benle1 benle1  38K May  6 16:49 000008.jpg
-rw-r--r--. 1 benle1 benle1  45K May  6 16:49 000009.jpg
-rw-r--r--. 1 benle1 benle1  39K May  6 16:49 000010.jpg
-rw-r--r--. 1 benle1 benle1  39K May  6 16:49 000011.jpg
-rw-r--r--. 1 benle1 benle1  51K May  6 16:49 000012.jpg
-rw-r--r--. 1 benle1 benle1  30K May  6 16:49 000013.jpg
-rw-r--r--. 1 benle1 benle1  35K May  6 16:49 000014.jpg
-rw-r--r--. 1 benle1 benle1  57K May  6 16:49 000015.jpg
-rw-r--r--. 1 benle1 benle1  35K May  

In [None]:
plt.imshow(plt.imread('assets/datasets/urban-reid-challenge-augmented-replaced/image_query/000001.jpg'))
plt.show()
plt.imshow(plt.imread('assets/datasets/urban-reid-challenge-augmented-replaced/image_test/000001.jpg'))
plt.show()
plt.imshow(plt.imread('assets/datasets/urban-reid-challenge-augmented-replaced/image_train/000001.jpg'))
plt.show()

In [23]:
!python train.py --config_file {config_train_embedding_r}

2025-05-06 16:50:16,268 PAT INFO: Saving model in the path :assets/models/PAT_r
2025-05-06 16:50:16,268 PAT INFO: Namespace(config_file='config/UrbanElementsReID_train_embedding_r.yml', opts=[], local_rank=0)
2025-05-06 16:50:16,268 PAT INFO: Loaded configuration file config/UrbanElementsReID_train_embedding_r.yml
2025-05-06 16:50:16,269 PAT INFO: 
MODEL:
  PRETRAIN_CHOICE: 'imagenet'
  #PRETRAIN_PATH: "../../.cache/torch/hub/checkpoints" # root of pretrain path
  PRETRAIN_PATH: "assets/models" 
  IF_LABELSMOOTH: 'on'
  IF_WITH_CENTER: 'no'
  NAME: 'part_attention_vit'
  NO_MARGIN: True
  DEVICE_ID: ('0')
  TRANSFORMER_TYPE: 'vit_base_patch16_224_TransReID'
  STRIDE_SIZE: [16, 16]

INPUT:
  SIZE_TRAIN: [256,128]
  SIZE_TEST: [256,128]
  REA:
    ENABLED: False
  PIXEL_MEAN: [0.5, 0.5, 0.5]
  PIXEL_STD: [0.5, 0.5, 0.5]
  LGT: # Local Grayscale Transfomation
    DO_LGT: True
    PROB: 0.5

DATASETS:
  TRAIN: ('UrbanElementsReID',)
  TEST: ('UrbanElementsReID',)
  ROOT_DIR: 'assets/datase

In [None]:
files = glob.glob(os.path.join(model_path_embedding_r, "part_attention_vit_*.pth"))
max_epoch = max([int(f.split('_')[-1].split('.')[0]) for f in files])
assert max_epoch > 0
assert max_epoch == int(hyperparams_train_embedding_r['SOLVER']['MAX_EPOCHS'])
assert os.path.exists(os.path.join(model_path_embedding_r, f'part_attention_vit_{max_epoch}.pth'))
logging.info(f'max_epoch={max_epoch}')

INFO:root:max_epoch=60


In [None]:
# rename so that we cna train PAT++ without changing the code
# TODO: make this more dynamic
!cp assets/models/PAT_r/part_attention_vit_60.pth assets/models/PAT_r/jx_vit_base_p16_224-80ecf9dd.pth

# PAT++

In [2]:
%%time
# TODO: all the checks that are done in the other training should be encapsulated in a function, and done here too

!python train.py --config_file {config_train_plusplus}

2025-05-11 23:03:38,770 PAT INFO: Saving model in the path :assets/models/PAT_plusplus
2025-05-11 23:03:38,770 PAT INFO: Namespace(config_file='config/UrbanElementsReID_train_patplusplus.yml', opts=[], local_rank=0)
2025-05-11 23:03:38,770 PAT INFO: Loaded configuration file config/UrbanElementsReID_train_patplusplus.yml
2025-05-11 23:03:38,770 PAT INFO: 
MODEL:
  PRETRAIN_CHOICE: 'imagenet'
  #PRETRAIN_PATH: "../../.cache/torch/hub/checkpoints" # root of pretrain path
  PRETRAIN_PATH: "assets/models/PAT_r" 
  IF_LABELSMOOTH: 'on'
  IF_WITH_CENTER: 'no'
  NAME: 'part_attention_vit'
  NO_MARGIN: True
  DEVICE_ID: ('0')
  TRANSFORMER_TYPE: 'vit_base_patch16_224_TransReID'
  STRIDE_SIZE: [16, 16]

INPUT:
  SIZE_TRAIN: [256,128]
  SIZE_TEST: [256,128]
  REA:
    ENABLED: False
  PIXEL_MEAN: [0.5, 0.5, 0.5]
  PIXEL_STD: [0.5, 0.5, 0.5]
  LGT: # Local Grayscale Transfomation
    DO_LGT: True
    PROB: 0.5

DATASETS:
  TRAIN: ('UrbanElementsReID',)
  TEST: ('UrbanElementsReID',)
  #ROOT_DIR: 