In [7]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
import os
import json
import torch
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from PIL import Image
import glob
import tqdm
import pandas as pd
from torchvision.transforms.functional import to_tensor
import ast
import numpy as np
from PIL import ImageDraw
from torchvision.transforms import Compose, Resize, Pad, ToTensor
from PIL import ImageOps


class StrawberryDataset(torch.utils.data.Dataset):
    def __init__(self, image_list, data_labels, transforms=None):
        self.image_list = image_list
        self.data_labels = data_labels
        self.transforms = transforms
        self.label_to_id = {
            'Angular Leafspot': 0,
            'Anthracnose Fruit Rot': 1,
            'Blossom Blight': 2,
            'Gray Mold': 3,
            'Leaf Spot': 4,
            'Powdery Mildew Fruit': 5,
            'Powdery Mildew Leaf': 6
        }

    def __getitem__(self, idx):
        img = self.image_list[idx]
        img_path = self.data_labels.iloc[idx]['image']
        labels = self.data_labels.iloc[idx]['label']
        label_ids = [self.label_to_id[label] for label in labels]
        points_list = self.data_labels.iloc[idx]['points']

        width, height = img.size
        masks = []
        bboxes = []
        areas = []

        for points in points_list:
            points_tuples = [tuple(point) for point in points]
            # Create the mask
            mask = Image.new('L', (width, height))
            draw = ImageDraw.Draw(mask)
            draw.polygon(points_tuples, fill=1)

            # Calculate the bounding box
            xy = [point for sublist in points for point in sublist]
            x_values = xy[0::2]
            y_values = xy[1::2]
            x_min, x_max = min(x_values), max(x_values)
            y_min, y_max = min(y_values), max(y_values)
            bbox = [x_min, y_min, x_max, y_max]

            mask = torch.tensor(np.array(mask), dtype=torch.uint8)
            masks.append(mask)
            bboxes.append(bbox)
            areas.append((x_max - x_min) * (y_max - y_min))

        # Stack masks along the first dimension
        masks = torch.stack(masks, dim=0)

        target = {
            'boxes': torch.tensor(bboxes, dtype=torch.float32),
            'labels': torch.tensor(label_ids, dtype=torch.int64),
            'masks': masks,
            'image_id': torch.tensor([idx]),
            'area': torch.tensor(areas, dtype=torch.float32),
            'iscrowd': torch.tensor([0] * len(labels), dtype=torch.int64)
        }

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.image_list)


def collate_fn(batch):
    return tuple(zip(*batch))

def get_transform(img, target, min_dim=512, max_dim=960):
    def resize_and_pad_fn(img, target):
        w, h = img.size
        scale_factor = min(min_dim / min(w, h), max_dim / max(w, h))
        new_w = int(w * scale_factor)
        new_h = int(h * scale_factor)

        img = img.resize((new_w, new_h), Image.BILINEAR)
        padding = (min_dim - new_w) // 2, (min_dim - new_h) // 2
        img = ImageOps.expand(img, padding)

        # Apply the same padding to the target's bounding boxes
        target['boxes'][:, :2] += torch.tensor(padding, dtype=torch.float32)
        target['boxes'][:, 2:] += torch.tensor(padding, dtype=torch.float32)

        return img, target
    
    img, target = resize_and_pad_fn(img, target) 
    img = to_tensor(img)
    return img, target

# Extract train, test, val images and labels

def load_data(data_dir):
    images = [] 
    labels = {} # to capture image name as key and corresponding label out of json as value
    points = {}
    
    elems = glob.glob(os.path.join(data_dir, '*.jpg'))
    elems = sorted(elems)
    
    i = 0
    for elem in tqdm.tqdm(elems):
        # Read image
        img = Image.open(elem)
        images.append(img)
    
        # Read label path
        label_path = elem.lower().replace('jpg','json')
    
        # Read labels from json file
        f = open(label_path)
        label_data = json.load(f)
        
        image_labels = []
        image_points = []
        for shapes in label_data['shapes']:
            label = shapes['label']
            point = shapes['points']
            image_labels.append(label)
            image_points.append(point)

        labels[label_data['imagePath']] = image_labels
        points[label_data['imagePath']] = image_points
        
        #for testing with smaller data volume
        #if i == 10:
            #break
        i+=1
        
    df = pd.DataFrame(list(labels.items()), columns=["image", "label"])
    df_points = pd.DataFrame(list(points.items()), columns=["image", "points"])
    df = df.merge(df_points, on="image")
        
    return images, df

train_dir = "archive (1)/train/"
val_dir = "archive (1)/val/"
test_dir = "archive (1)/test/"

train_images, df_train_labels = load_data(train_dir)
test_images, df_test_labels = load_data(val_dir)
val_images, df_val_labels = load_data(test_dir)

100%|██████████████████████████████████████████████████████████████████████████████| 1450/1450 [00:02<00:00, 599.24it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 307/307 [00:00<00:00, 908.07it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 743/743 [00:00<00:00, 975.08it/s]


In [8]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from pprint import pprint

def simple_mean_average_precision(targets, preds):
    # Convert input format to the format required by the metric
    formatted_targets = []
    formatted_preds = []
    
    for target, pred in zip(targets, preds):
        formatted_targets.append({
            'boxes': target['boxes'],
            'labels': target['labels'],
        })
        formatted_preds.append({
            'boxes': pred['boxes'],
            'scores': pred['scores'],
            'labels': pred['labels'],
        })

    #print("\n\nFormatted Targets: ", formatted_targets[0])
    #print("\n\nFormatted Preds: ", formatted_preds[0])
    # Initialize the MeanAveragePrecision metric
    metric = MeanAveragePrecision()

    # Update and compute the metric
    metric.update(formatted_preds, formatted_targets)
    result = metric.compute()

    return result

In [9]:
from torch.cuda.amp import GradScaler, autocast
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from tqdm import tqdm
import gc

def get_model(type, num_classes = 7):
    if type == "baseline":
        model = maskrcnn_resnet50_fpn(pretrained=True)
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
        model.to(device)
        return model
    
def get_optimizer(type, batch_size, gradient_accumulation_steps, lr=0.0001, momentum=0.9, weight_decay=0.0001):
    if type == "SGD":
        return torch.optim.SGD(params, lr=lr*batch_size*gradient_accumulation_steps/2, momentum=momentum, weight_decay=weight_decay)
        
batch_size = 2

gc.collect()
torch.cuda.empty_cache()

train_data = StrawberryDataset(train_images, df_train_labels, transforms=get_transform)
val_data = StrawberryDataset(test_images, df_test_labels, transforms=get_transform)
test_data = StrawberryDataset(val_images, df_val_labels, transforms=get_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

num_classes = 7  # Your dataset has 7 classes (labels range from 0 to 6)
model = get_model("baseline", num_classes)

# Set up the optimizer and learning rate scheduler
gradient_accumulation_steps = 4
params = [p for p in model.parameters() if p.requires_grad]
optimizer = get_optimizer("SGD", batch_size=batch_size, gradient_accumulation_steps=gradient_accumulation_steps, lr=0.0001)
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
scaler = GradScaler()
num_epochs = 50
history = pd.DataFrame(columns=["Epoch", "Train Loss", "Val Loss", "Train mAP", "Val mAP"])
for epoch in range(num_epochs):
    # Train the model for one epoch
    model.train()
    running_loss = 0
    train_mAP = 0
    total_train_samples = 0
    step = 0
    
    torch.cuda.synchronize()
    gc.collect()
    torch.cuda.empty_cache()
    
    for images, targets in tqdm(train_loader, desc=f"Training epoch {epoch + 1}/{num_epochs}"):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with autocast():
            loss_dict = model(images, targets)
            #predictions = model(images)
            losses = sum(loss for loss in loss_dict.values())

        scaler.scale(losses / gradient_accumulation_steps).backward()
        running_loss += losses.cpu().detach().item()

        # Calculate train mAP
        #train_mAP += simple_mean_average_precision(targets, predictions, num_classes=num_classes)
        total_train_samples += len(targets)
        if (step + 1) % gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
        step += 1

    # Use learning rate scheduler
    # r_scheduler.step()

    model.eval()
    val_loss = 0
    val_mAP = []
    total_val_samples = 0
    predictions = []

    # Initialize a dictionary to store the running sum of each mAP metric
    mAP_sums = {}

    with torch.no_grad():
        for images, targets in tqdm(val_loader, desc=f"Validating epoch {epoch + 1}/{num_epochs}"):
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            with autocast():
                predictions = model(images)  # Get predictions
            mAP_dict = simple_mean_average_precision(targets, predictions)

            # Update the running sum of each mAP metric
            for key, value in mAP_dict.items():
                if key in mAP_sums:
                    mAP_sums[key] += value.cpu().detach().numpy()
                else:
                    mAP_sums[key] = value.numpy()

            #losses = sum(loss for loss in loss_dict.values())
            #val_loss += losses.item()

            total_val_samples += len(targets)

    # Calculate the average of each mAP metric
    mAP_averages = {key: value / total_val_samples for key, value in mAP_sums.items()}
    val_mAP.append(mAP_averages)

    train_loss = running_loss / (step + 1)
    #print(f"Epoch: {epoch + 1}, Train Loss: {train_loss}, Val mAP: {val_mAP[-1]}")
    print(f"Epoch: {epoch + 1}, Train Loss: {train_loss}, Val mAP: {mAP_averages}")
    # Append the current epoch results to the history DataFrame
    history = history.append({
        "Epoch": epoch + 1,
        "Train Loss": train_loss,
        #"Val Loss": val_loss,
        #"Train mAP": train_mAP,
        "Val mAP": val_mAP},
        ignore_index=True)

history.to_csv("training_history.csv", index=False)
torch.save(model.state_dict(), "mask_rcnn_strawberry.pth")

# Evaluate the model on the test set
model.eval()
test_mAP = []
total_test_samples = 0
predictions_test = []
# Initialize a dictionary to store the running sum of each mAP metric
mAP_test_sums = {}
with torch.no_grad():
    for images, targets in test_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with autocast():
            predictions += model(images)  # Get predictions
            mAP_dict = simple_mean_average_precision(targets, predictions)

            # Update the running sum of each mAP metric
            for key, value in mAP_dict.items():
                if key in mAP_test_sums:
                    mAP_test_sums[key] += value.cpu().detach().numpy()
                else:
                    mAP_test_sums[key] = value.cpu().detach().numpy()

            #losses = sum(loss for loss in loss_dict.values())
            #val_loss += losses.item()

            total_test_samples += len(targets)

    # Calculate the average of each mAP metric
    mAP_test_averages = {key: value / total_val_samples for key, value in mAP_test_sums.items()}
    print(f"Test mAP: {mAP_test_averages}")

print("Finished evaluating the model on the test set.")

cuda


  scaler = GradScaler()
  with autocast():
Training epoch 1/50: 100%|████████████████████████████████████████████████████████████| 725/725 [03:40<00:00,  3.29it/s]
  with autocast():
Validating epoch 1/50:   0%|                                                                    | 0/154 [00:00<?, ?it/s]


ModuleNotFoundError: `MAP` metric requires that `pycocotools` or `faster-coco-eval` installed. Please install with `pip install pycocotools` or `pip install faster-coco-eval` or `pip install torchmetrics[detection]`.