In [1]:
from models.detector_model.model import ObjectDetectionModel
from models.detector_model.processor import TrainingProcessor
from models.detector_model.data_utils import TrainingDataset, COCOProcessor
from models.detector_model.loss import ObjectDetectionLoss
from torch.utils.data import DataLoader
import torch
import torch.optim as optim

grouped_classes = {
        "Metal": [
            "Metal bottle cap", "Metal lid", "Drink can", "Pop tab", "Scrap metal",
            "Food Can", "Aluminium blister pack", "Aluminium foil", "Aerosol"
        ],
        "Plastic": [
            "Plastic bottle cap", "Other plastic wrapper", "Six pack rings",
            "Single-use carrier bag", "Plastic straw", "Plastic glooves",
            "Plastic utensils", "Disposable plastic cup", "Other plastic bottle",
            "Tupperware", "Spread tub", "Garbage bag", "Other plastic container",
            "Other plastic", "Rope & strings", "Other plastic cup", "Plastic film",
            "Polypropylene bag", "Plastic lid", "Clear plastic bottle", "Squeezable tube",
            "Carded blister pack", "Crisp packet", "Meal carton"
        ],
        "Paper": [
            "Paper cup", "Paper bag", "Normal paper", "Paper straw", "Tissues",
            "Toilet tube", "Wrapping paper", "Pizza box", "Magazine paper",
            "Corrugated carton", "Egg carton", "Other carton", "Drink carton"
        ],
        "Glass": [
            "Glass jar", "Glass bottle", "Glass cup", "Broken glass"
        ],
        "Waste": [
            "Cigarette", "Food waste", "Foam cup",
            "Disposable food container", "Foam food container",
            "Shoe", "Unlabeled litter", "Styrofoam piece"
        ],
        "Battery": [
            "Battery"
        ],
    }

In [2]:
model = ObjectDetectionModel(num_classes=len(grouped_classes), num_anchors=4, grid_size=4)
model.count_parameters()
coco_processor = COCOProcessor(classes=grouped_classes)

MODEL PARAMETER SUMMARY
Total parameters:      390,700
Trainable parameters:  390,700
Non-trainable params:  0


In [3]:
extracted_trash = coco_processor.extract_annotations(
    'D:/Sakal/AI_FARM/Recycling_Classification/Dataset/Dataset/Trash Detection.v14i.coco/train/_annotations.coco.json',
    'D:/Sakal/AI_FARM/Recycling_Classification/Dataset/Dataset/Trash Detection.v14i.coco/train',
    convert=False
)

extracted_taco = coco_processor.extract_annotations(
    'D:/Sakal/AI_FARM/Recycling_Classification/Dataset/TACO/data/annotations.json',
    'D:/Sakal/AI_FARM/Recycling_Classification/Dataset/TACO/data',
    convert=True
)

classes_names_trash = []
for label in extracted_trash:
    classes_names_trash.extend(label['Class'])
classes_names_trash = list(set(classes_names_trash))

classes_names_taco = []
for label in extracted_taco:
    classes_names_taco.extend(label['Class'])
classes_names_taco = list(set(classes_names_taco))

In [4]:
from PIL import Image
classes = [item for item, value in grouped_classes.items()] # ['Metal', 'Plastic', 'Paper', 'Glass', 'Waste', 'Battery']

processor = TrainingProcessor(
    input_size=448,
    grid_size=model.grid_size,
    num_anchors=model.num_anchors,
    classes=classes,
)

trash_dataset = TrainingDataset(data_json=extracted_trash, processor=processor, is_training=False)
trash_dataloader = DataLoader(trash_dataset, batch_size=25, shuffle=True)

# image_tensor, target_tensor, anchor_pose = processor.process_training_sample(
#     extracted_trash[90], apply_augmentation=False, get_anchors=True)

# processor.visualize_training_sample(
#     image_tensor, target_tensor, anchor_pose,)

# bboxes = processor.convert_yolo_output_to_bboxes(target_tensor)
# processor.draw_bbox_on_image(image_tensor, bboxes, tensor=True, show=False)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = ObjectDetectionLoss(processor=processor, classes_alpha=[0.3,0.4,0.3,0.1,0.3,0.2])
optimizer = optim.Adam(
    model.parameters(),
    lr=1e-4,           # learning rate
    betas=(0.9, 0.999),# beta1 and beta2 for momentum estimates
    eps=1e-8,          # small constant for numerical stability
    weight_decay=0     # L2 regularization
)

num_epochs = 50
batch_interval = 1

In [7]:
model.to(device)
for epoch in range(num_epochs):
    epoch_loss = 0.0  
    num_batches = 0
    
    for i, (x, y) in enumerate(trash_dataloader):
        x,y = x.to(device), y.to(device)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        
        # Accumulate for epoch average
        epoch_loss += loss.item()  # Add current batch loss
        num_batches += 1
        
        if i % batch_interval == 0:
            print(f'\tBatch: [{i+1}/{len(trash_dataloader)}], Batch Loss: {loss.item():.4f}, Overall Loss: {(epoch_loss / num_batches):.4f}')
    
    avg_epoch_loss = epoch_loss / num_batches
    print(f'Epoch: [{epoch+1}/{num_epochs}], Avg Loss: {avg_epoch_loss:.4f}')
    epoch_loss = 0.0

	Batch: [1/240], Batch Loss: 0.0000, Overall Loss: 0.0000
	Batch: [2/240], Batch Loss: 0.0230, Overall Loss: 0.0115
	Batch: [3/240], Batch Loss: 0.0000, Overall Loss: 0.0077
	Batch: [4/240], Batch Loss: 0.0113, Overall Loss: 0.0086
	Batch: [5/240], Batch Loss: 0.0000, Overall Loss: 0.0069
	Batch: [6/240], Batch Loss: 0.0000, Overall Loss: 0.0057
	Batch: [7/240], Batch Loss: 0.0238, Overall Loss: 0.0083
	Batch: [8/240], Batch Loss: 0.0000, Overall Loss: 0.0073
	Batch: [9/240], Batch Loss: 0.0000, Overall Loss: 0.0065
	Batch: [10/240], Batch Loss: 0.0000, Overall Loss: 0.0058
	Batch: [11/240], Batch Loss: 0.0499, Overall Loss: 0.0098
	Batch: [12/240], Batch Loss: 0.0000, Overall Loss: 0.0090
	Batch: [13/240], Batch Loss: 0.0227, Overall Loss: 0.0101
	Batch: [14/240], Batch Loss: 0.0000, Overall Loss: 0.0093
	Batch: [15/240], Batch Loss: 0.0000, Overall Loss: 0.0087
	Batch: [16/240], Batch Loss: 0.0000, Overall Loss: 0.0082
	Batch: [17/240], Batch Loss: 0.0000, Overall Loss: 0.0077
	Batch

KeyboardInterrupt: 

In [None]:
criterion.binary_focal_loss.alpha

0.25