In [4]:
import torch
import segmentation_models_pytorch as smp
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np
from sklearn.model_selection import train_test_split

# Define the dataset class
class SegmentationDataset(Dataset):
    def __init__(self, image_files, label_folder, transform=None):
        self.image_files = image_files
        self.label_folder = label_folder
        self.transform = transform
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        img = Image.open(img_path).convert('RGB')
        label_path = os.path.join(self.label_folder, os.path.basename(img_path).replace('.jpg', '_label.png'))
        label = Image.open(label_path).convert('L')  # Convert to grayscale
        
        if self.transform:
            img = self.transform(img)
            label = self.transform(label)
        
        return img, torch.tensor(np.array(label), dtype=torch.long)

# Data transformation
data_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Create dataset
image_folder = 'D:/New folder/Inter_Bootcamp/dataset/train'  # Update with your image folder path
label_folder = 'D:/New folder/Inter_Bootcamp/dataset/labels'  # Update with your label folder path
image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.jpg')]

# Split dataset into training and validation sets
train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

# Create DataLoader for training and validation
train_dataset = SegmentationDataset(train_files, label_folder, transform=data_transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

val_dataset = SegmentationDataset(val_files, label_folder, transform=data_transform)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Load pre-trained model from SMP
model = smp.Unet(
    encoder_name="resnet34",        # Choose any encoder available in SMP
    encoder_weights="imagenet",     # Use pre-trained weights
    classes=3,                      # Number of segmentation classes
    activation=None                 # Raw logits
)

# Move model to GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

# Define loss and metrics
loss_fn = smp.losses.DiceLoss(mode='multiclass')
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),  # Intersection over Union
    smp.utils.metrics.Fscore(beta=1.0)     # F-Beta Score
]

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

# Training loop
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        imgs, labels = batch
        imgs, labels = imgs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation loop
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            imgs, labels = batch
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            val_loss += loss_fn(outputs, labels).item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [5]:
image_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("D:/New folder/Inter_Bootcamp/dataset/train") for f in filenames if f.endswith('_leftImg8bit.jpg')]

print(f"Found {len(image_files)} image files.")


Found 7034 image files.


In [None]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import os
import segmentation_models_pytorch as smp
import torchmetrics.functional as tmf
from torchmetrics import JaccardIndex 
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
import numpy as np

n_classes = 40  # Based on your labels

# Split the dataset
image_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("D:/New folder/Inter_Bootcamp/dataset/train") for f in filenames if f.endswith('_leftImg8bit.jpg')]
if len(image_files) == 0:
    raise ValueError("No image files found. Check the directory path or file naming convention.")
    
train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

class CustomCityscapesDataset(Dataset):
    def __init__(self, image_files, label_dir, transform=None):
        self.image_files = image_files
        self.label_dir = label_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        img_name = os.path.basename(img_path).replace('_leftImg8bit.jpg', '')
        batch_name = os.path.basename(os.path.dirname(img_path))

        label_path = os.path.join(self.label_dir, batch_name, f"{img_name}_gtFine_labelColors.png")
        image = Image.open(img_path).convert("RGB")
        label = Image.open(label_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
            label = self.transform(label)

        return image, label

# Dataset transformation
data_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Create datasets
train_dataset = CustomCityscapesDataset(train_files, "D:/New folder/Inter_Bootcamp/dataset/train", transform=data_transform)
val_dataset = CustomCityscapesDataset(val_files, "D:/New folder/Inter_Bootcamp/dataset/train", transform=data_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

# Helper function to encode segmented maps
def encode_segmap(mask, label_list):
    mask = mask.astype(np.uint8)
    label_map = np.zeros(mask.shape[:2], dtype=np.int32)
    for label in label_list:
        r, g, b = label[11]
        color_mask = (mask[:, :, 0] == r) & (mask[:, :, 1] == g) & (mask[:, :, 2] == b)
        label_map[color_mask] = label[5]
    return label_map

# Define the labels (You already have this in your code, ensure consistency)
labels_list = [
    ('road', 0, 7, 0, 0, 0, 'drivable', 0, 0, False, False, (128, 64, 128)),
    ('parking', 1, 9, 255, 1, 1, 'drivable', 1, 0, False, False, (250, 170, 160)),
    ('drivable fallback', 2, 255, 255, 2, 1, 'drivable', 1, 0, False, False, (81, 0, 81)),
    ('sidewalk', 3, 8, 1, 3, 2, 'non-drivable', 2, 1, False, False, (244, 35, 232)),
    ('rail track', 4, 10, 255, 3, 3, 'non-drivable', 3, 1, False, False, (230, 150, 140)),
    ('non-drivable fallback', 5, 255, 9, 4, 3, 'non-drivable', 3, 1, False, False, (152, 251, 152)),
    ('person', 6, 24, 11, 5, 4, 'living-thing', 4, 2, True, False, (220, 20, 60)),
    ('animal', 7, 255, 255, 6, 4, 'living-thing', 4, 2, True, True, (246, 198, 145)),
    ('rider', 8, 25, 12, 7, 5, 'living-thing', 5, 2, True, False, (255, 0, 0)),
    ('motorcycle', 9, 32, 17, 8, 6, '2-wheeler', 6, 3, True, False, (0, 0, 230)),
    ('bicycle', 10, 33, 18, 9, 7, '2-wheeler', 6, 3, True, False, (119, 11, 32)),
    ('autorickshaw', 11, 255, 255, 10, 8, 'autorickshaw', 7, 3, True, False, (255, 204, 54)),
    ('car', 12, 26, 13, 11, 9, 'car', 7, 3, True, False, (0, 0, 142)),
    ('truck', 13, 27, 14, 12, 10, 'large-vehicle', 8, 3, True, False, (0, 0, 70)),
    ('bus', 14, 28, 15, 13, 11, 'large-vehicle', 8, 3, True, False, (0, 60, 100)),
    ('caravan', 15, 29, 255, 14, 12, 'large-vehicle', 8, 3, True, True, (0, 0, 90)),
    ('trailer', 16, 30, 255, 15, 12, 'large-vehicle', 8, 3, True, True, (0, 0, 110)),
    ('train', 17, 31, 16, 15, 12, 'large-vehicle', 8, 3, True, True, (0, 80, 100)),
    ('vehicle fallback', 18, 355, 255, 15, 12, 'large-vehicle', 8, 3, True, False, (136, 143, 153)),
    ('curb', 19, 255, 255, 16, 13, 'barrier', 9, 4, False, False, (220, 190, 40)),
    ('wall', 20, 12, 3, 17, 14, 'barrier', 9, 4, False, False, (102, 102, 156)),
    ('fence', 21, 13, 4, 18, 15, 'barrier', 10, 4, False, False, (190, 153, 153)),
    ('guard rail', 22, 14, 255, 19, 16, 'barrier', 10, 4, False, False, (180, 165, 180)),
    ('billboard', 23, 255, 255, 20, 17, 'structures', 11, 4, False, False, (174, 64, 67)),
    ('traffic sign', 24, 20, 7, 21, 18, 'structures', 11, 4, False, False, (220, 220, 0)),
    ('traffic light', 25, 19, 6, 22, 19, 'structures', 11, 4, False, False, (250, 170, 30)),
    ('pole', 26, 17, 5, 23, 20, 'structures', 12, 4, False, False, (153, 153, 153)),
    ('polegroup', 27, 18, 255, 23, 20, 'structures', 12, 4, False, False, (153, 153, 153)),
    ('obs-str-bar-fallback', 28, 255, 255, 24, 21, 'structures', 12, 4, False, False, (169, 187, 214)),
    ('building', 29, 11, 2, 25, 22, 'construction', 13, 5, False, False, (70, 70, 70)),
    ('bridge', 30, 15, 255, 26, 23, 'construction', 13, 5, False, False, (150, 100, 100)),
    ('tunnel', 31, 16, 255, 26, 23, 'construction', 13, 5, False, False, (150, 120, 90)),
    ('vegetation', 32, 21, 8, 27, 24, 'vegetation', 14, 5, False, False, (107, 142, 35)),
    ('sky', 33, 23, 10, 28, 25, 'sky', 15, 6, False, False, (70, 130, 180)),
    ('fallback background', 34, 255, 255, 29, 25, 'object fallback', 15, 6, False, False, (169, 187, 214)),
    ('unlabeled', 35, 0, 255, 255, 255, 'void', 255, 255, False, True, (0, 0, 0)),
    ('ego vehicle', 36, 1, 255, 255, 255, 'void', 255, 255, False, True, (0, 0, 0)),
    ('rectification border', 37, 2, 255, 255, 255, 'void', 255, 255, False, True, (0, 0, 0)),
    ('out of roi', 38, 3, 255, 255, 255, 'void', 255, 255, False, True, (0, 0, 0)),
    ('license plate', 39, 255, 255, 255, 255, 'vehicle', 255, 255, False, True, (0, 0, 142)),
]

# Define the segmentation model with lightning
class OurModel(LightningModule):
    def __init__(self):
        super(OurModel, self).__init__()
        self.model = smp.Unet(
            encoder_name="resnet50", 
            encoder_weights="imagenet", 
            in_channels=3, 
            classes=n_classes
        )
        self.lr = 1e-3
        self.criterion = smp.losses.DiceLoss(mode='multiclass')
        self.metric = JaccardIndex(task='multiclass', num_classes=n_classes)
    
    def forward(self, x):
        return self.model(x)
    
    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.lr)
    
    def training_step(self, batch, batch_idx):
        img, seg = batch
        
        # Move img and seg to the device (GPU) and set the correct data types
        img = img.to(self.device).half()  # Convert input images to float16 and move to GPU
        seg = seg.permute(0, 2, 3, 1).cpu().numpy()  # BCHW -> BHWC
        seg = np.array([encode_segmap(s, labels_list) for s in seg])  # Encode segmentation mask
        seg = torch.tensor(seg, dtype=torch.long).to(self.device)  # Convert to tensor and move to GPU
        
        output = self(img)
        loss = self.criterion(output, seg)
        iou = self.metric(output, seg)
        
        self.log('train_loss', loss, on_epoch=True)
        self.log('train_iou', iou, on_epoch=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        img, seg = batch
        
        # Move img and seg to the device (GPU) and set the correct data types
        img = img.to(self.device).half()  # Convert input images to float16 and move to GPU
        seg = seg.permute(0, 2, 3, 1).cpu().numpy()  # BCHW -> BHWC
        seg = np.array([encode_segmap(s, labels_list) for s in seg])  # Encode segmentation mask
        seg = torch.tensor(seg, dtype=torch.long).to(self.device)  # Convert to tensor and move to GPU
        
        output = self(img)
        loss = self.criterion(output, seg)
        iou = self.metric(output, seg)
        
        self.log('val_loss', loss, on_epoch=True)
        self.log('val_iou', iou, on_epoch=True)
        
        return loss

# Training setup
checkpoint_callback = ModelCheckpoint(monitor='val_loss', save_top_k=1)
trainer = Trainer(
    max_epochs=25, 
    accelerator='gpu', 
    precision=16, 
    callbacks=[checkpoint_callback]
)

# Start training
trainer.fit(OurModel(), train_loader, val_loader)


  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
  scaler = torch.cuda.amp.GradScaler()
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3050 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                   | Params
-----------------------------------------------------
0 | model     | Unet                   | 32.5 M
1 | criterion | DiceLoss               | 0     
2 | metric    | MulticlassJaccardIndex | 0     
-----------------------------------------------------
32.5 M    Trainable params
0 

Sanity Checking: 0it [00:00, ?it/s]