In [24]:
import torch
import numpy as np
from torchvision import models
import os
import json
from torchvision import io
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as T
from torchvision.transforms import Resize, RandomCrop, Normalize
from torchvision.models.detection import MaskRCNN
from torch import nn, device
import random


DATA_DIR = "C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete"
IMAGES_DIR = os.path.join(DATA_DIR, "Images")
MASKS_DIR = os.path.join(DATA_DIR, "Masks") 
ANNOTATIONS_DIR = os.path.join(DATA_DIR, "BoundingBoxs")

def get_annotations_file_path(image_filename):
    filename_without_extension = os.path.splitext(image_filename)[0]
    return os.path.join(ANNOTATIONS_DIR, f"{filename_without_extension}.json")

class CrackDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.images = os.listdir(IMAGES_DIR) 
        self.masks = os.listdir(MASKS_DIR)
        self.annotations = {}
        self.class_labels = set()  # Keep track of unique class labels
        for image_name in self.images:
            annotations_file_path = get_annotations_file_path(image_name)
            with open(annotations_file_path) as f:
                annotation = json.load(f)
                self.annotations[image_name] = annotation
                if 'objects' in annotation:
                    for obj in annotation['objects']:
                        self.class_labels.add(obj['class_label'])
                    
        self.num_classes = len(self.class_labels)  

    def __getitem__(self, i):
        image_name = self.images[i]
        mask_name = self.masks[i] 
        annotations = self.annotations[image_name]
        image = io.read_image(os.path.join(IMAGES_DIR, image_name))
        mask = io.read_image(os.path.join(MASKS_DIR, mask_name))
        return image, mask, annotations

    def __len__(self):
        return len(self.images)

def transform(image, mask):
    resized = Resize(256)
    image = resized(image)
    mask = resized(mask)
    random_crop = RandomCrop(224,224)  
    image, mask = random_crop(image, mask)
    if random.random() < 0.5:
        image = T.hflip(image)  
        mask = T.hflip(mask)
    normalized = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    image = normalized(image)
    image = T.to_tensor(image)  
    mask = torch.squeeze(mask, dim=0)
    return image, mask

dataset = CrackDataset(DATA_DIR)
dataloader = DataLoader(dataset, batch_size=1)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_dataloader = DataLoader(
    train_dataset, 
    batch_size=1,
    shuffle=True,
    num_workers=4
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False, 
    num_workers=4
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load a pre-trained ResNet model
backbone = models.resnet50(pretrained=True)
in_features = backbone.fc.in_features
num_classes = dataset.num_classes  
backbone.fc = nn.Linear(in_features, num_classes)

# Freeze some layers of the backbone model
for name, param in backbone.named_parameters():
        param.requires_grad = False

# Create a list of trainable parameters
params = [p for p in backbone.parameters() if p.requires_grad]

# Initialize the optimizer with trainable parameters
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
criterion = nn.CrossEntropyLoss().to(device)

model = MaskRCNN(
   backbone=backbone,
   pretrained=True,
   num_classes=dataset.num_classes 
)


for epoch in range(20):
    for images, targets in train_dataloader:
        images = images.to(device)
        targets = targets.to(device)
        outputs = model(images)
        
        # Reshape the outputs
        batch_size, num_boxes, num_classes = outputs.shape
        outputs = outputs.permute(0, 2, 1).reshape(batch_size * num_boxes, num_classes)
        
        loss = criterion(outputs, targets)  
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
      
    with torch.no_grad():
        val_loss = 0
        val_accuracy = 0
        for images, targets in val_dataloader:
            images = images.to(device)
            targets = targets.to(device)
            outputs = model(images)
            
            # Reshape the outputs
            batch_size, num_boxes, num_classes = outputs.shape
            outputs = outputs.permute(0, 2, 1).reshape(batch_size * num_boxes, num_classes)
            
            loss = criterion(outputs, targets)
            val_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)
            actual = targets
            accuracy = (predictions == actual).float().mean()
            val_accuracy += accuracy

        val_loss /= len(val_dataloader)
        val_accuracy /= len(val_dataloader)

        print(f"Epoch: {epoch+1}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")
        
torch.save(model.state_dict(), 'C:\\New folder\\Dr. Surya\\MaskRCNN\\crack_maskrcnn.pth')


AttributeError: 'int' object has no attribute 'out_channels'