In [None]:
#https://www.nablasquared.com/are-you-tired-of-scrolling-down-through-logs-when-training-an-ml-model/

In [None]:
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------LIBRARIES---------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#!pip install torch torchvision
import torch
import torch.distributed as dist
import torch.nn as nn
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import VOCDetection
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, DistributedSampler
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.multiprocessing as mp
from torchvision.transforms import functional as F
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_fscore_support
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from typing import Dict
import numpy as np
from torch.utils.data import Dataset
import os
from PIL import Image
import xml.etree.ElementTree as ET
import collections
from torchvision import transforms
import pandas as pd

#-------------------------------------------------------------------------------------JUPYTER NOTEBOOK SETTINGS-------------------------------------------------------------------------------------
from IPython.core.display import display, HTML                                    
display(HTML("<style>.container { width:100% !important; }</style>"))     

In [None]:
# Check cuda availability
torch.cuda.is_available()

In [None]:
available_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]
available_gpus

In [None]:
# Setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        device = torch.device(f'cuda:{i}')
        torch.cuda.set_device(device)
        print(f'GPU {i+1}/{torch.cuda.device_count()}: {torch.cuda.get_device_name(i)}')
        print('Memory Usage:')
        print('Allocated:', round(torch.cuda.memory_allocated(i)/1024**3,1), 'GB')
        print('Cached:   ', round(torch.cuda.memory_reserved(i)/1024**3,1), 'GB')
        print('-------------------------------------')

In [None]:
CUSTOM_CLASSES = {"name": 1, "value": 2, "x-axis": 3, "y-axis": 4, "plot":5}
PASSTHROUGH_FIELDS = ['folder', 'filename', 'source', 'size', 'segmented', 'object']

def transform_voc_target(target, width, height):
    boxes = []
    labels = []
    for obj in target["annotation"]["object"]:
        class_name = obj[0]
        bbox = obj[-1]
        # Normalize the bounding box coordinates
        boxes.append([float(bbox["xmin"]) / width, float(bbox["ymin"]) / height, float(bbox["xmax"]) / width, float(bbox["ymax"]) / height])
        if class_name in CUSTOM_CLASSES:
            labels.append(CUSTOM_CLASSES[class_name])
        else:
            print(f"Warning: {class_name} is not in CUSTOM_CLASSES")
            # you might want to handle this situation better
    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    labels = torch.as_tensor(labels, dtype=torch.int64)
    # Hash the filename to a unique numeric value
    image_id = torch.tensor([hash(target["annotation"]["filename"])])
    target = {}
    target["boxes"] = boxes
    target["labels"] = labels
    target["image_id"] = image_id
    return target

class CustomVOCDetection(Dataset):
    def __init__(self, root, dataset_name, image_set='train', transforms=None, classes=None):
        self.root = root
        self.classes = classes
        
        voc_root = os.path.join(self.root, 'VOCdevkit', dataset_name)
        image_dir = os.path.join(voc_root, 'JPEGImages')
        annotation_dir = os.path.join(voc_root, 'Annotations')

        if not os.path.isdir(voc_root):
            raise RuntimeError('Dataset not found or corrupted.')

        splits_dir = os.path.join(voc_root, 'ImageSets', 'Main')
        
        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')

        with open(os.path.join(split_f), "r") as f:
            file_names = [x.strip() for x in f.readlines()]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names]
        self.transforms = transforms

    def __getitem__(self, index):
        img = Image.open(self.images[index]).convert('RGB')
        if len(img.getbands()) != 3:
            print(f"Image at {self.images[index]} does not have 3 channels after conversion to RGB")
            
        # Get the original image size
        width, height = img.size
        target = self.parse_voc_xml(
            ET.parse(self.annotations[index]).getroot())

        if self.transforms is not None:
            img = self.transforms(img)
            #print(f'Image shape after transform: {img.shape}')  # Debugging print
        target = transform_voc_target(target, width, height)         # Pass the width and height to the function

        return img, target

    def __len__(self):
        return len(self.images)

    def parse_voc_xml(self, node):
        voc_dict = {}
        children = list(node)
        if children:
            def_dic = collections.defaultdict(list)
            for dc in map(self.parse_voc_xml, children):
                for ind, v in dc.items():
                    def_dic[ind].append(v)
            if node.tag in PASSTHROUGH_FIELDS:
                voc_dict[node.tag] = [def_dic[ind][0] if len(def_dic[ind]) == 1 else def_dic[ind] for ind in def_dic]
            else:
                voc_dict[node.tag] = {ind: def_dic[ind][0] if len(def_dic[ind]) == 1 else def_dic[ind] for ind in def_dic}
        if node.text:
            text = node.text.strip()
            if not children:
                voc_dict[node.tag] = text
        return voc_dict

def collate_fn(batch):
    return tuple(zip(*batch))

def calculate_iou(target_boxes, pred_boxes):
    iou_values = []
    for target_box in target_boxes:
        for pred_box in pred_boxes:
            # Compute the intersection
            inter_rect_xmin = max(target_box[0], pred_box[0])
            inter_rect_ymin = max(target_box[1], pred_box[1])
            inter_rect_xmax = min(target_box[2], pred_box[2])
            inter_rect_ymax = min(target_box[3], pred_box[3])

            inter_area = max(0, inter_rect_xmax - inter_rect_xmin) * max(0, inter_rect_ymax - inter_rect_ymin)

            # Compute the union
            target_area = (target_box[2] - target_box[0]) * (target_box[3] - target_box[1])
            pred_area = (pred_box[2] - pred_box[0]) * (pred_box[3] - pred_box[1])
            union_area = target_area + pred_area - inter_area

            # Compute the IoU
            iou = inter_area / union_area
            iou_values.append(iou)
    
    return torch.tensor(iou_values)

In [None]:
# Multiprocessing
if mp.get_start_method(allow_none=True) != 'spawn':
    mp.set_start_method('spawn', force=True)
    
# Check device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the pretrained model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 6
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Put model to device
model.to(device)

# Data processing
data_transforms = transforms.Compose([
    transforms.Resize(512),                                            
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Apply this function to your dataset using the transforms parameter
train_data = CustomVOCDetection(
    root="pascal_voc_datasets/",
    dataset_name="PlotsEnchanced_Original_NoAugmentation",
    image_set="experimental",
    transforms=data_transforms,
    classes=CUSTOM_CLASSES 
)

val_data = CustomVOCDetection(
    root="pascal_voc_datasets/",
    dataset_name="PlotsEnchanced_Original_NoAugmentation",
    image_set="experimental",  # assuming the set name is 'validation'
    transforms=data_transforms,
    classes=CUSTOM_CLASSES 
)

# Use collate_fn in DataLoader
train_data_loader = DataLoader(train_data, batch_size=16, shuffle=True, num_workers=0, collate_fn=collate_fn)
val_data_loader = DataLoader(val_data, batch_size=16, shuffle=False, num_workers=0, collate_fn=collate_fn)

# Debugging tensor data
# for i, (images, targets) in enumerate(data_loader):
#     print(f'Batch index {i}:')
#     print('Image:', images)
#     print('Targets:', targets)
#     if i >= 1:  # print only first 2 batches
#         break

# Define optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0001, momentum=0.9, weight_decay=0.0005)
#optimizer = torch.optim.Adam(params, lr=0.0001, weight_decay=0.0005)

# Initialize the gradient scaler
scaler = GradScaler()

# Define loss history for plotting
loss_hist = []
# Initialize validation loss history for plotting
valid_loss_hist = []

# Add a path for the checkpoint
MODEL_NAME = "EXPERIMENTAL_5_rcnn_batch-16_epoch-20_part-enchanced_non-augmented"
MODEL_EXTENSION = ".pt"
MODEL_SAVE_PATH = "pytorch_rcnn_models/" + MODEL_NAME + MODEL_EXTENSION
CHECKPOINT_PATH = "pytorch_rcnn_checkpoints/" + MODEL_NAME + "/"

# Load the latest checkpoint if exists
start_epoch = 0
if os.path.exists(CHECKPOINT_PATH):
    try:
        checkpoint_files = [f for f in os.listdir(CHECKPOINT_PATH) if f.endswith('.pth')]
        checkpoint_files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]))  # sort by epoch number
        latest_checkpoint = checkpoint_files[-1]
        checkpoint = torch.load(os.path.join(CHECKPOINT_PATH, latest_checkpoint))

        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        loss_hist = checkpoint['loss_hist']
        print(f"Loaded checkpoint from epoch {checkpoint['epoch'] + 1}")

    except Exception as e:
        print(f"No checkpoint found at {CHECKPOINT_PATH} or loading failed. Starting from scratch. Error: {str(e)}")
else:
    print(f"No checkpoint directory found at {CHECKPOINT_PATH}. Starting from scratch.")

# Training loop
num_epochs = 10
iou_threshold = 0.5  # set IoU threshold
for epoch in range(start_epoch, num_epochs):
    if torch.cuda.is_available():
        for i in range(torch.cuda.device_count()):
            print(f'GPU {i+1}/{torch.cuda.device_count()}: {torch.cuda.get_device_name(i)}')
            print('Memory Usage:')
            print('Allocated:', round(torch.cuda.memory_allocated(i)/1024**3,1), 'GB')
            print('Cached:   ', round(torch.cuda.memory_reserved(i)/1024**3,1), 'GB')
            print('-------------------------------------')
            
    # Model Training
    torch.cuda.empty_cache()         # clear cuda cache
    model.train()
    loss_epoch = []
    progress_bar = tqdm(train_data_loader, desc=f"Training epoch {epoch+1}/{num_epochs}", unit="batch")
    
    for images, targets in progress_bar:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        
        try:
            with autocast():
                loss_dict = model(images, targets)
                losses = sum(loss for loss in loss_dict.values())
        except Exception as e:
            print(f"Error during forward pass: {e}")
            continue

        scaler.scale(losses).backward()
        scaler.step(optimizer)
        scaler.update()

        loss_value = losses.item()
        loss_epoch.append(loss_value)
        progress_bar.set_postfix({"batch_loss": loss_value})

    epoch_loss = sum(loss_epoch)/len(loss_epoch)
    loss_hist.append(epoch_loss)
    print(f"Epoch loss: {epoch_loss}")
    
    # Validation Loop
    torch.cuda.empty_cache()         # clear cuda cache
    valid_loss = 0
    with torch.no_grad():
        progress_bar = tqdm(val_data_loader, desc=f"Validating epoch {epoch+1}/{num_epochs}", unit="batch")
        for images, targets in progress_bar:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in target.items()} for target in targets]

            # Forward
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            valid_loss += losses.item()

        # Average validation loss
        valid_loss /= len(val_data_loader)
        valid_loss_hist.append(valid_loss)
        print(f"Validation loss: {valid_loss}")
    
    # Accuracy loop
#     with torch.no_grad():
#         model.eval()
#         accuracy_epoch = []
#         progress_bar = tqdm(val_data_loader, desc="Calculating accuracy", unit="batch")

#         for images, targets in progress_bar:
#             images = list(image.to(device) for image in images)
#             targets = [{k: v.to(device) for k, v in target.items()} for target in targets]

#             predictions = model(images)

#             for pred, target in zip(predictions, targets):
#                 pred_boxes = pred['boxes']
#                 target_boxes = target['boxes']
#                 ious = calculate_iou(target_boxes, pred_boxes)

#                 correct_preds = ious > iou_threshold
#                 accuracy = correct_preds.sum().item() / len(pred_boxes)
#                 accuracy_epoch.append(accuracy)

#             progress_bar.set_postfix({"batch_accuracy": accuracy})

#         accuracy_per_epoch = sum(accuracy_epoch) / len(accuracy_epoch)
#         print(f"Accuracy per Epoch: {accuracy_per_epoch}")


    # Save the model checkpoint at the end of each epoch
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': epoch_loss,
        'loss_hist': loss_hist,  
        'valid_loss_hist': valid_loss_hist,
    }, os.path.join(CHECKPOINT_PATH, f"checkpoint_epoch_{epoch+1}.pth"))

# Save the model after training
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print("The model has been saved!")

# Configure Seaborn
sns.set_theme()

# Create a pandas DataFrame for loss history
df_loss = pd.DataFrame(data={'Epoch': range(1, num_epochs + 1), 'Training Loss': loss_hist, 'Validation Loss': valid_loss_hist})

# Plotting the loss using seaborn
plt.figure(figsize=(10, 5))
sns.lineplot(data=df_loss, x='Epoch', y='Training Loss', color='orange', label='Training Loss')
sns.lineplot(data=df_loss, x='Epoch', y='Validation Loss', color='blue', label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot
plt.savefig('training_loss_plots/' + MODEL_NAME + ".png", dpi=300)

# Show the plot(which also resets the current figure and axes)
plt.show()

In [None]:
# INDEPENDENT MODEL LOADER FOR TESTING PURPOSES
# Load the pretrained model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)

# Replace the classifier with a new one
CUSTOM_CLASSES = {"name": 1, "value": 2, "x-axis": 3, "y-axis": 4, "plot":5}
num_classes = len(CUSTOM_CLASSES) + 1
in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Put model to device 
model.to(device)

# Load saved model
model.load_state_dict(torch.load('pytorch_rcnn_models/EXPERIMENTAL_4_rcnn_batch-16_epoch-20_full-crypto.com_non-augmented.pt'))


In [None]:
# Load test data
test_data = CustomVOCDetection(
    root="pascal_voc_datasets/",
    dataset_name="PlotsNoAugmentation",
    image_set="val",
    transforms=data_transforms,
    classes=CUSTOM_CLASSES 
)

# DataLoader for test data
test_data_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=0, collate_fn=collate_fn)

# Get the number of classes from the test dataset - hardcoded as the num of classes are known -> speeds up processing
class_correct = [0.] + [0. for _ in range(len(CUSTOM_CLASSES))]
class_total = [0.] + [0. for _ in range(len(CUSTOM_CLASSES))]

# Testing
model.eval()  # set model to evaluation mode

for images, targets in test_data_loader:
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    # forward pass: compute predicted outputs by passing inputs to the model
    with torch.no_grad():
        output = model(images)

    # for each output result
    for i, output_dict in enumerate(output):
        scores = output_dict['scores']
        labels = output_dict['labels']

        # If the model did not detect any objects, continue to the next image
        if scores.numel() == 0:
            continue

        # get the label with the highest score
        max_score_index = scores.argmax()
        pred_label = labels[max_score_index]

        # compare predictions to true label
        for true_label in targets[i]['labels']:
            correct = (pred_label == true_label).item()

            # calculate test accuracy for each object class
            class_correct[true_label.item()] += correct
            class_total[true_label.item()] += 1

# Loop over classes in the dictionary
for class_name in CUSTOM_CLASSES.keys():
    i = CUSTOM_CLASSES[class_name]  # Get class index from class name
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            class_name, 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (class_name))

total_correct = np.sum(class_correct)
total = np.sum(class_total)

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * total_correct / total if total > 0 else 0,
    total_correct, total))

In [None]:
# Convert the output and targets to the COCO format
def to_coco_format(images, output, targets):
    coco_format_output = []
    coco_format_targets = []
    ann_id = 1  # Initialize annotation id

    for i in range(len(images)):
        image_id = targets[i]['image_id'].item()  # Use image_id from target
        image_size = images[i].shape[-2:]
        
        # Convert output
        for box, label, score in zip(output[i]["boxes"], output[i]["labels"], output[i]["scores"]):
            box = box.tolist()
            label = label.item()
            score = score.item()

            # Convert to [x, y, width, height]
            box[2] -= box[0]
            box[3] -= box[1]

            # Create COCO-style detection
            coco_format_output.append({
                'image_id': image_id,
                'category_id': label,
                'bbox': box,
                'score': score,
                'id': ann_id  # Add 'id' field
            })

            ann_id += 1  # Increment annotation id

        # Convert targets
        for box, label in zip(targets[i]["boxes"], targets[i]["labels"]):
            box = box.tolist()
            label = label.item()

            # Convert to [x, y, width, height]
            box[2] -= box[0]
            box[3] -= box[1]

            # Create COCO-style annotation
            coco_format_targets.append({
                'image_id': image_id,
                'category_id': label,
                'bbox': box,
                'id': ann_id  # Add 'id' field
            })

            ann_id += 1  # Increment annotation id

    return coco_format_output, coco_format_targets

# Define test_data_loader
test_data = CustomVOCDetection(
    root="pascal_voc_datasets/",
    dataset_name="PlotsNoAugmentation",
    image_set="test",  # Using 'test' here
    transforms=data_transforms,
    classes=CUSTOM_CLASSES 
)

test_data_loader = DataLoader(test_data, batch_size=16, shuffle=False, num_workers=0, collate_fn=collate_fn)

# Evaluate the model on the test data
model.eval()

coco_gt = COCO()  # COCO ground truth
coco_dt = COCO()  # COCO detections
for images, targets in test_data_loader:
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    # Make predictions
    with torch.no_grad():
        output = model(images)
        
    # Check if there are any detections
    for out in output:
        if len(out['boxes']) > 0:
            print('Detections made')
            break
    else:
        print('No detections made')

    coco_format_output, coco_format_targets = to_coco_format(images, output, targets)

    # Define category dictionary
    categories = [{'id': i, 'name': name} for i, name in enumerate(CUSTOM_CLASSES)]

    # Load results into COCO objects
    coco_gt.dataset = {'annotations': coco_format_targets, 'categories': categories}
    coco_dt.dataset = {'annotations': coco_format_output, 'categories': categories}

    coco_gt.createIndex()
    coco_dt.createIndex()

    # Create COCO evaluator and evaluate
    coco_eval = COCOeval(cocoGt=coco_gt, cocoDt=coco_dt, iouType='bbox')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    
# Extract Precision-Recall data from coco_eval object
precision = coco_eval.eval['precision']
recall = coco_eval.params.recThrs

print(len(coco_format_output)) 

# Number of categories
num_categories = precision.shape[2]

# Create a figure for the plots
fig, ax = plt.subplots(figsize=(10, 10))

# For each category
for category in range(num_categories):
    # Get precision for this category
    precision_per_category = precision[:, :, category, 0, -1] # We select the max detection per image (-1)
    # Compute average precision across all recall thresholds
    avg_precision = precision_per_category.mean(axis=0)
    
    # Plot
    ax.plot(recall, avg_precision, label=CUSTOM_CLASSES[category])

# Set up labels and title
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_title('Per Category Precision-Recall Curve')
ax.legend()  # Add a legend

plt.grid(True)
plt.show()


In [None]:
# Code to manually clear the CUDA(VRAM) and RAM cache in case of issues or KeyboardInterrupt

# delete model or unnecessary tensors
#del model

# python garbage collection
import gc
gc.collect()

# clear cuda cache
torch.cuda.empty_cache()

# detach tensors
#tensor = tensor.detach()