# Steps to train part classifiers

### Prepare environment for training

 - Import necessary modules: `sys` and `os`.
 - Define the project's main directory and append it to `sys.path`

In [1]:
import sys
import os

project_main_dir = os.path.abspath('..')
# Add the project's main directory to sys.path
if project_main_dir not in sys.path:
    sys.path.append(project_main_dir)
os.chdir(project_main_dir)

##### import necessary classes

In [2]:
from src.utils.data_utils import save_concept_hierarchy , load_concept_hierarchy
from src.utils.config import TrainingConfig
from src.training.data_loader import DataLoader
from src.training.trainer import train_classifiers

##### Initialize the PartClassifier, ImageLoader, and MaskVisualizer

In [3]:
# Note: You can also pass custom configuration to InferenceConfig class
# Example: config = InferenceConfig(input_dir='test_images/', output_dir='output/', threshold=0.5)
config = TrainingConfig()
data_loader = DataLoader(config)

Using cache found in /home/elenc2/.cache/torch/hub/facebookresearch_dinov2_main


##### Load images and concept hiararchy

In [None]:
image_cache, concept_parts = data_loader.get_image_data_from_path()

# Save concept hierarchy
save_concept_hierarchy(concept_parts, f"{config.checkpoint_dir}/concept_hierarchy.pkl")
print(f"Loaded data: for {len(image_cache.keys())} images")
print(f"Concept parts: for {len(concept_parts.keys())} concepts")
print(f"Keys are {concept_parts.keys()} concepts")

### Get root concept parts dictionary

In [6]:
rootConcept_parts = {}
for concept, parts in concept_parts.items():
    rootConcept = concept.split('--')[0]
    if rootConcept not in rootConcept_parts:
        rootConcept_parts[rootConcept] = set()
    rootConcept_parts[rootConcept] = rootConcept_parts[rootConcept].union(set(parts))

for rootConcept, parts in rootConcept_parts.items():
    print (f"Number of parts: {len(parts)} for {rootConcept}")
    # print(f"{rootConcept}: {parts}")
    print("--")

Number of parts: 115 for boats
--
Number of parts: 146 for office supplies
--
Number of parts: 150 for kitchen
--
Number of parts: 64 for helicopter
--
Number of parts: 110 for tools
--
Number of parts: 210 for geography
--
Number of parts: 93 for garden
--
Number of parts: 90 for vehicles
--
Number of parts: 54 for weapons
--
Number of parts: 39 for drones
--
Number of parts: 22 for ships
--
Number of parts: 14 for geometry
--
Number of parts: 15 for airplanes
--


### Train rootConcept classifiers

In [None]:
save_concept_hierarchy(rootConcept_parts, f"{config.checkpoint_dir}/root_concept_hierarchy.pkl")
print(f"Loaded data: for {len(image_cache.keys())} images")
print(f"Root Concept parts: for {len(rootConcept_parts.keys())} concepts")
print(f"Keys are {rootConcept_parts.keys()} concepts")

print("Starting classifier training...")
root_classifiers = train_classifiers(image_cache, rootConcept_parts, config)
print(f"Training completed : trained classifiers for {len(root_classifiers.keys())} concepts")

Concept hierarchy saved to checkpoints_vitl14//l1/root_checkpoints/root_concept_hierarchy.pkl
Loaded data: for 6266 images
Root Concept parts: for 13 concepts
Keys are dict_keys(['boats', 'office supplies', 'kitchen', 'helicopter', 'tools', 'geography', 'garden', 'vehicles', 'weapons', 'drones', 'ships', 'geometry', 'airplanes']) concepts
Starting classifier training...


Training part classifiers for concepts: 100%|██████████| 13/13 [1:10:30<00:00, 325.40s/it]

Training completed : trained classifiers for 13 concepts





##### Train classifiers

In [10]:
print("Starting classifier training...")
classifiers = train_classifiers(image_cache, concept_parts, config)
print(f"Training completed : trained classifiers for {len(classifiers.keys())} concepts")

Starting classifier training...


Training part classifiers for concepts: 100%|██████████| 572/572 [00:00<00:00, 1635.12it/s]

Training completed : trained classifiers for 572 concepts



