In [1]:
import os
import json
import torch
import torchvision.transforms.functional as T
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor, MaskRCNN_ResNet50_FPN_Weights
from torch import nn
from torch.optim import SGD

# Set paths
ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete')
MODEL_DIR = os.path.join(ROOT_DIR, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\final_models.pth')

# Crack dataset class
class CrackDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.image_list = os.listdir(os.path.join(root, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete\\Images'))
        self.masks_dir = os.path.join(root, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete\\Masks')
        self.annotations_file = os.path.join(root, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete\\BoundingBoxs')

        if not os.path.exists(self.annotations_file):
            with open(self.annotations_file) as f:
                self.bbox_data = json.load(f)

    def __len__(self):
        return len(self.image_list)
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\Unity_Generation\\Concrete\\Images', self.image_list[idx])
        mask_path = os.path.join(self.masks_dir, self.image_list[idx])
        bbox_path = os.path.join(self.annotations_file, self.image_list[idx])

        image = read_image(img_path).float()
        mask = read_image(mask_path).float()
        
        if self.transforms:
            image, mask = self.transforms(image, mask)

        return image, mask

# Define data augmentation and transformation
def transform(image, mask):
    image = T.to_pil_image(image)
    mask = T.to_pil_image(mask)

    # Apply transformations (resize, flip, etc.)
    image = T.resize(image, [256, 256])
    mask = T.resize(mask, [256, 256])

    # Convert back to tensors
    image = T.to_tensor(image)
    mask = T.to_tensor(mask)

    return image, mask

# Load the dataset
dataset = CrackDataset(DATA_DIR, transforms=transform)

# Create a data loader
data_loader = DataLoader(dataset, batch_size=2, shuffle=True)

# Model setup
class MaskRCNN(nn.Module):
    def __init__(self, model_dir, num_classes=2):
        super(MaskRCNN, self).__init__()
        self.model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1, pretrained=True)
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        self.model.roi_heads.mask_predictor = MaskRCNNPredictor(256, 256, num_classes)

    def forward(self, images, targets=None):
        if self.training and targets is None:
            raise AssertionError("targets should not be none when in training mode")
        return self.model(images, targets)

model = MaskRCNN(MODEL_DIR)

# Training
optimizer = SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
criterion = nn.BCEWithLogitsLoss()
epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for images, masks in data_loader:
        optimizer.zero_grad()
        outputs = model(images, targets=masks.reshape(2,-1, 4))  # Pass masks as targets
        loss = criterion(outputs[0]['masks'].squeeze(), masks)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(data_loader)
    print(f'Epoch [{epoch}/{epochs}] - Loss: {average_loss:.4f}')

# Save trained model
torch.save(model.state_dict(), os.path.join(MODEL_DIR, 'C:\\New folder\\Dr. Surya\\MaskRCNN\\final_models.pth'))

# Validation
model.eval()
total_loss = 0

for images, masks in data_loader:
    with torch.no_grad():
        outputs = model(images, targets=masks)
        loss = criterion(outputs[0]['masks'].squeeze(), masks)
        total_loss += loss.item()

average_loss = total_loss / len(data_loader)
print(f'Validation Loss: {average_loss:.4f}')
print(model)

IndexError: too many indices for tensor of dimension 2