<a href="https://colab.research.google.com/github/SJin765/class_AI4dl/blob/main/Graph_Competition/EfficientDet_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Benetech | EfficientDet [Train]

https://www.kaggle.com/code/alejopaullier/benetech-efficientdet-train

# Import EfficientDet, Pytorch

In [None]:
# !pip install pycocotools

In [None]:
import sys
sys.path.insert(0, "../input/timm-efficientdet-pytorch")
sys.path.insert(0, "../input/omegaconf")
sys.path.insert(0, "../input/albumentations")

import albumentations as A
import copy
import cv2
import gc
import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import os
import pandas as pd
import PIL
import random
import time
import timm
import torch
import torch.nn as nn


from albumentations.pytorch.transforms import ToTensorV2
from colorama import Fore, Back, Style
from datetime import datetime, timedelta
from glob import glob
from pprint import pprint
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.optim import lr_scheduler
from tqdm import tqdm


c_  = Fore.GREEN
sr_ = Style.RESET_ALL
print(f"There are {multiprocessing.cpu_count()} CPUs available")
print()
!mkdir logs
!mkdir saved_models

# Configuration

## Hyperparameter

In [3]:
# 구글 드라이브에 내가 올려놓은 파일을 이용할 경우
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

# file_path = '/content/drive/MyDrive/benetech-making-graphs-accessible'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 드라이브 내 압축풀기
import zipfile

zip_file_path = '/content/drive/MyDrive/benetech-making-graphs-accessible.zip'
extract_path = '/content/drive/MyDrive/benetech-making-graphs-accessible'

# 압축 파일 열기
with zipfile.ZipFile(zip_file_path, 'r') as zip_file:
    # 압축 파일 내 폴더 목록 확인
    folder_list = [name for name in zip_file.namelist() if name.endswith('/')]

    # 폴더 내용물 확인
    for folder_name in folder_list:
        print(f"Folder: {folder_name}")
        file_list = zip_file.namelist()

        # 폴더 내 파일 목록 확인
        for file_name in file_list:
            if file_name.startswith(folder_name) and not file_name.endswith('/'):
                print(f"File: {file_name}")

    # 압축 파일 해제
    zip_file.extractall(extract_path)

In [None]:
class config:
    BATCH_SIZE_TRAIN = 4
    BATCH_SIZE_VALID = 2
    DEBUG = False
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    EPOCHS = 5
    FOLDS = 5
    LR = 2e-4
    MIN_LR = 1e-6
    NUM_WORKERS = multiprocessing.cpu_count()
    RESOLUTION = 512
    SAMPLE = 30_000
    SEED = 42
    SCHEDULER = 'CosineAnnealingLR'
    T_0 = 25
    T_MAX = int(30_000/BATCH_SIZE_TRAIN*EPOCHS)+50
    WARMUP_EPOCHS = 0
    WEIGHT_DECAY = 1e-6
    
    
class paths:
    TRAIN_ANNOTATIONS_FOLDER = "/content/drive/MyDrive/benetech-making-graphs-accessible/train/annotations/"
    TRAIN_IMAGES_FOLDER = "/content/drive/MyDrive/benetech-making-graphs-accessible/train/images/"

## Utils

In [None]:
def get_stoi(df):
    """Get String to Index dictionary"""
    stoi = {}
    for idx, string in enumerate(df.label.unique()):
        stoi[string] = idx + 1
    itos = {item[1]: item[0] for item in stoi.items()}
    df = df.replace({"label": stoi})
    return stoi


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(config.SEED)

## Load Competition Data

In [None]:
df = pd.read_csv("/content/drive/MyDrive/benetech-create-bounding-box-dataframe/train.csv")
print(f"Dataframe shape is: {df.shape}")
df.head()

## Data Pre-procesing

In [None]:
image_ids = df["image_id"].unique().tolist()
image_ids = random.sample(image_ids, config.SAMPLE)
filter = df[(df["x0"]<0) | (df["y0"]<0) | (df["h"]<=0) | (df["w"]<=0)].index
df = df[~df.index.isin(filter)]
df = df[df["image_id"].isin(image_ids)]
stoi = get_stoi(df)
pprint(stoi)
config.NUM_CLASSES =  len(stoi)
df = df[df["label"].isin(list(stoi.keys()))]
df = df.replace({"label": stoi})
df.reset_index(inplace=True)
print(f"Dataframe shape is: {df.shape}")
print(f"Number of classes: {config.NUM_CLASSES}")
df.head()

## Split into train, validation

In [None]:
gkf = GroupKFold(n_splits=config.FOLDS) # Seed for reproducibility
X = df.loc[:, df.columns != "label"]
y = df.loc[:, df.columns == "label"]
groups = df.loc[:, df.columns == "image_id"]

for fold, (train_index, val_index) in enumerate(gkf.split(X, y, groups)):
    df.loc[val_index, 'fold'] = int(fold) # Assign to each row its Fold ID
display(df.groupby(['fold','chart'])["index"].count())

## Albumentations transformations



In [None]:
def get_train_transforms():
    return A.Compose(
        [
            A.Resize(height=config.RESOLUTION, width=config.RESOLUTION, p=1),
            A.Normalize(p=1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc', min_area=0,  min_visibility=0, label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=config.RESOLUTION, width=config.RESOLUTION, p=1.0),
            A.Normalize(p=1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']
        )
    )

## Create custom PyTorch dataset

In [None]:
class CustomDataset(Dataset):

    def __init__(self, df, transforms=None):
        super().__init__()
        self.df = df # pandas dataframe
        self.transforms = transforms # albumentations transformations
        self.image_ids = self.df["image_id"].unique().tolist() # list with unique image IDs

    def __getitem__(self, index: int):
        """
        :return image: an augmented image as a numpy array.
        :return target: a dictionary containing a tensor with the bboxes for the image (torch.Tensor),
        a list of strings containing the bboxes labels and a tensor containing the image index.
        :return image_id: the image ID. A unique identifier for each image. 
        
        """
        image_id = self.image_ids[index] # select one image
        image, boxes = self.load_image_and_boxes(index) # load the image and its associated bounding boxes
        labels = self.get_labels(index)
        target = {
            'boxes' : boxes, 
            'labels' : labels, 
            'index' : torch.tensor([index])
        }
        transformed_image = self.transforms(**{
            'image': image,
            'bboxes': target['boxes'],
            'labels': target["labels"]
        })
        image = transformed_image['image']
        _, new_h, new_w = image.shape
        
        # This creates a torch tensor of size (number_bboxes, 4) where each row is a bounding box:
        target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*transformed_image['bboxes'])))).permute(1, 0)
        target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  # Required: change order to: (y, x, y, x)
        target["img_size"] = (new_h, new_w)
        target["img_scale"] = torch.tensor([1.0])
        return image, target, image_id

    def __len__(self) -> int:
        return len(self.image_ids)

    def load_image_and_boxes(self, index):
        """
        :return image: the image as a numpy array. The array is scaled to the [0,1] interval. Numpy array.
        :return boxes: an array containing bounding boxes rows = [x0, y0, x0 + height, y0 + width]. List of lists.
        """
        image_id = self.image_ids[index] # select image
        image = cv2.imread(f'{paths.TRAIN_IMAGES_FOLDER}{image_id}.jpg', cv2.IMREAD_COLOR) # read image from path
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) # convert to RGB 
        records = self.df[self.df['image_id'] == image_id] # select all rows corresponding to the image
        boxes = records[['x0', 'y0', 'w', 'h']].values # get bounding box information
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] # x0 + Δx (or also  x0 + height), pascal_voc format
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3] # y0 + Δy (or also  y0 + width), pascal_voc format
        boxes = boxes.tolist() # convert to list
        return image, boxes
    
    def get_labels(self, index):
        image_id = self.image_ids[index]
        labels = self.df[self.df['image_id'] == image_id]["label"].values.tolist()
        labels = torch.tensor(labels)
        return labels

## Prepare dataloaders

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))


def prepare_loaders(fold, df):
    """
    Splits data into Train and Validation sets depending on the current fold. Creates PyTorch
    datasets from the splits. Creates DataLoaders from Datasets.
    :param fold: current fold (int).
    :param df: train dataframe (pandas dataframe).
    :return train_loader, valid_loader: dataloaders for each stage.
    """
    # === Select data for Train and Validation ===
    train_df = df.query("fold!=@fold").reset_index(drop=True) # Select all rows not from validation fold
    valid_df = df.query("fold==@fold").reset_index(drop=True) # Select all rows from validation fold
    
    # === Mini sample for debugging purposes ===
    if config.DEBUG:
        train_df = train_df.head(32*5).query("empty==0")
        valid_df = valid_df.head(32*3).query("empty==0")
    
    # === Build Datasets ===
    train_dataset = CustomDataset(train_df, transforms=get_train_transforms())
    valid_dataset = CustomDataset(valid_df, transforms=get_valid_transforms())
    
    # === Create DataLoaders for Train and Validation ===
    train_loader = DataLoader(train_dataset, 
                              batch_size=config.BATCH_SIZE_TRAIN if not config.DEBUG else 20, 
                              num_workers=config.NUM_WORKERS,
                              sampler=RandomSampler(train_dataset),
                              pin_memory=False, drop_last=False, collate_fn=collate_fn)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=config.BATCH_SIZE_VALID if not config.DEBUG else 20, 
                              num_workers=config.NUM_WORKERS,
                              sampler=SequentialSampler(valid_dataset),
                              shuffle=False, pin_memory=True, collate_fn=collate_fn)
    
    return train_loader, valid_loader


train_loader, valid_loader = prepare_loaders(fold=0, df=df)

## Scheduler

In [None]:
def fetch_scheduler(optimizer):
    if config.SCHEDULER == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=config.T_MAX, 
                                                   eta_min=config.MIN_LR)
    elif config.SCHEDULER == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=config.T_0, 
                                                             eta_min=config.MIN_LR)
    elif config.SCHEDULER == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=7,
                                                   threshold=0.0001,
                                                   min_lr=config.MIN_LR,)
    elif config.SCHEDULER == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    elif config.SCHEDULER == None:
        return None
        
    return scheduler

# Model

## Create training Model

In [None]:
from effdet.config.model_config import efficientdet_model_param_dict
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.efficientdet import HeadNet
from effdet.config.model_config import efficientdet_model_param_dict

def create_model(num_classes=config.NUM_CLASSES, image_size=512,
                 architecture="tf_efficientnetv2_s", verbose=False):
    
    efficientdet_model_param_dict['tf_efficientnetv2_s'] = dict(
        name='tf_efficientnetv2_s',
        backbone_name='tf_efficientnetv2_s',
        backbone_args=dict(drop_path_rate=0.2),
        num_classes=num_classes,
        url='')
    
    cfg = get_efficientdet_config(architecture)
    cfg.update({'num_classes': num_classes})
    cfg.update({'image_size': (image_size, image_size)})
    
    if verbose:
        pprint(cfg)

    net = EfficientDet(cfg, pretrained_backbone=True)
    net.class_net = HeadNet(
        cfg,
        num_outputs=cfg.num_classes,
    )
    return DetBenchTrain(net, cfg)


def load_model(model_weights_path, model):
    """
    Load model weights.
    """
    model.load_state_dict(torch.load(model_weights_path))
    model.eval()
    return model

## Average Meter : Track metrics and loss

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        """Initializes an instance by reseting its values"""
        self.reset()

    def reset(self):
        """Resets all values to zero"""
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        """
        Tracks values, count, sum and average.
        :param val: usually the loss function value.
        :param n: usually the number of samples.
        """
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

## Train Function : 1 epoch

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train() # Set model in training mode
    loss_meter = AverageMeter() # Create instance
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ') # Progress bar
    for step, (images, targets, image_ids) in pbar:
        # === Collate ===
        images = torch.stack(images).to(device).float() # Get images (batch_size, 3, RESOLUTION, RESOLUTION)
        batch_size = images.shape[0] # Get batch size
        boxes = [target['boxes'].to(device).float() for target in targets] # Get bounding boxes
        labels = [target['labels'].to(device).float() for target in targets] # Get labels (tuple with strings)
        img_size = torch.tensor([target["img_size"] for target in targets]).to(device).float()
        img_scale = torch.tensor([target["img_scale"] for target in targets]).to(device).float()
        annotations = {
            "bbox": boxes,
            "cls": labels,
            "img_size": img_size,
            "img_scale": img_scale
        }
        optimizer.zero_grad() # Zero out gradients
        loss = model(images, annotations) # Forward pass
        loss = loss["loss"]
        loss.backward() # Back propagation
        # Since the reduction type of the loss is "mean" we multiply by batch_size
        loss_meter.update(loss.detach().item(), batch_size) # Update loss
        optimizer.step() # Update params
        scheduler.step() # Update learning rate
        
        # === Evaluate model ===
        
        mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # Track memory
        current_lr = optimizer.param_groups[0]['lr'] # Get current Learning Rate
        pbar.set_postfix(train_loss=f'{loss_meter.avg:0.4f}',
                         lr=f'{current_lr:0.5f}',
                         gpu_mem=f'{mem:0.2f} GB')
    # === Release memory ===
    torch.cuda.empty_cache()
    gc.collect()
    
    return loss_meter

## Validation Function : 1 epoch

In [None]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval() # Set model in evaluation mode
    loss_meter = AverageMeter() # Create instance
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ') # Progress bar
    for step, (images, targets, image_ids) in pbar:  
        # === Collate ===
        images = torch.stack(images).to(device).float() # Get images
        batch_size = images.shape[0] # Get batch size
        boxes = [target['boxes'].to(device).float() for target in targets] # Get boxes
        labels = [target['labels'].to(device).float() for target in targets] # Get labels
        img_size = torch.tensor([target["img_size"] for target in targets]).to(device).float()
        img_scale = torch.tensor([target["img_scale"] for target in targets]).to(device).float()
        
        annotations = {
            "bbox": boxes,
            "cls": labels,
            "img_size": img_size,
            "img_scale": img_scale
        }
        loss = model(images, annotations) # Forward pass
        loss = loss["loss"]
        loss_meter.update(loss.detach().item(), batch_size) # Update loss
        # === Evaluate model ===
        
        mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0 # Track memory
        current_lr = optimizer.param_groups[0]['lr'] # Get current learning rate
        pbar.set_postfix(valid_loss=f'{loss_meter.avg:0.4f}',
                         lr=f'{current_lr:0.5f}',
                         gpu_memory=f'{mem:0.2f} GB')
    # === Release memory ===
    torch.cuda.empty_cache()
    gc.collect()
    
    return loss_meter

## Train Loop

In [None]:
def train_loop(model, optimizer, scheduler, device, num_epochs):
    f = open(f"/kaggle/working/logs/logs.txt", "w+") # Create log file
    
    if torch.cuda.is_available(): # Check if GPU is available
        print("Cuda: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time() # Track execution time
    best_model_weights = copy.deepcopy(model.state_dict())
    epochs = config.EPOCHS
    best_loss = 1e10
    for epoch in range(1, epochs + 1):
        print(f'Epoch {epoch}/{num_epochs}', end='')
        loss_meter_train = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=config.DEVICE, epoch=epoch)
        
        loss_meter_valid = valid_one_epoch(model, valid_loader, 
                                           device=config.DEVICE, 
                                           epoch=epoch)
        
        duration = str(timedelta(seconds=time.time() - start))[:7]
        # === Print to log file ===
        with open(f"logs/logs.txt", 'a+') as f:
            print('{} | Epoch: {}/{} | Train Loss: {:.4} '. \
            format(duration, epoch + 1, epochs, loss_meter_train.avg), file=f)
            print('{} | Epoch: {}/{} | Valid Loss: {:.4}'. \
            format(duration, epoch + 1, epochs, loss_meter_valid.avg), file=f)
            print("\n" + "-"*100 + "\n", file=f)
        
        # === Save model if there is an improvement ===
        if loss_meter_valid.avg < best_loss:
            best_loss = loss_meter_valid.avg
            best_epoch = epoch
            best_model_weights = copy.deepcopy(model.state_dict())
            PATH = f"/kaggle/working/saved_models/best_epoch-{fold:02d}.bin"
            torch.save(model.state_dict(), PATH)
            print(f"Model Saved | Best Epoch {best_epoch} | Best Loss {round(best_loss,2)} {sr_}")
            
        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"last_epoch-{fold:02d}.bin"
        torch.save(model.state_dict(), PATH)
        print(); print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    
    return model

## Training

In [None]:
for fold in range(1):
    print(f'================= Fold: {1} =================')
    train_loader, valid_loader = prepare_loaders(fold=fold, df=df)
    model = create_model()
    model.to(config.DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.LR, weight_decay=config.WEIGHT_DECAY)
    scheduler = fetch_scheduler(optimizer)
    model = train_loop(model, optimizer, scheduler,
                       device=config.DEVICE,
                       num_epochs=config.EPOCHS)