### Sematic Segmentation

**U-Net Parts**

In [1]:
import torch
import torch.nn as nn

class Doubleconv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.conv_op = nn.Sequential(
      nn.Conv2d(in_channels, out_channels, kernel_size = 3, padding = 1),
      nn.ReLU(inplace = True),
      nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
      nn.ReLU(inplace = True)
    )

  def forward(self, x):
    return self.conv_op(x)

class DownSample(nn.Module):
  def __init__(self,in_channels, out_channels):
    super().__init__()
    self.conv = Doubleconv(in_channels, out_channels)
    self.pool = nn.MaxPool2d(kernel_size=2, stride = 2)

  def forward(self, x):
    down = self.conv(x)
    p = self.pool(down)

    return down, p

class UpSample(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.up  = nn.ConvTranspose2d(in_channels, in_channels//2, kernel_size = 2, stride = 2)
    self.conv = Doubleconv(in_channels, out_channels)

  def forward(self, x1, x2):
    x1 = self.up(x1)
    x = torch.cat([x1,x2],1)
    return self.conv(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


**UNet Model**

In [2]:
class UNet(nn.Module):
  def __init__(self, in_channels, num_classes):
    super().__init__()
    self.down_convolution_1 = DownSample(in_channels, 64)
    self.down_convolution_2 = DownSample(64, 128)
    self.down_convolution_3 = DownSample(128, 256)
    self.down_convolution_4 = DownSample(256, 512)

    self.bottle_neck = Doubleconv(512, 1024)

    self.up_convolution_1 = UpSample(1024, 512)
    self.up_convolution_2 = UpSample(512,256)
    self.up_convolution_3 = UpSample(256, 128)
    self.up_convolution_4 = UpSample(128, 64)

    self.out = nn.Conv2d(in_channels=64, out_channels=num_classes, kernel_size = 1)

  def forward(self, x):
    down_1, p1 = self.down_convolution_1(x)
    down_2, p2 = self.down_convolution_2(p1)
    down_3, p3 = self.down_convolution_3(p2)
    down_4, p4 = self.down_convolution_4(p3)

    b = self.bottle_neck(p4)

    up_1 = self.up_convolution_1(b, down_4)
    up_2 = self.up_convolution_2(up_1, down_3)
    up_3 = self.up_convolution_3(up_2, down_2)
    up_4 = self.up_convolution_4(up_3, down_1)

    out = self.out(up_4)
    return out



**Data Class**

In [None]:
import os
from PIL import Image
from torch.utils.data.dataset import Dataset
from torchvision import transforms

class CarvanaDataset(Dataset):
  def __init__(self , test = False):
    self.test = test
    self.root_path = r'C:\V\Sem_4_notes\Pytorch\DeepGlode_Road_Extraction_DataSet'
    if self.test:
      self.images = sorted([fr'{self.root_path}\test\{i}' for i in os.listdir(fr'{self.root_path}\test')])
    else:
      self.images = []
      self.mask = []

      for i in os.listdir(fr'{self.root_path}\train'):
        if 'sat' in i:
          self.images.append(i)
        if 'mask' in i:
          self.mask.append(i)
      self.images.sort()
      self.mask.sort()

    self.transform = transforms.Compose([
      transforms.Resize((512,512)),
      transforms.ToTensor()
    ])

  def __getitem__(self, index):
      img_path = os.path.join(self.root_path, 'train', self.images[index])
      img = Image.open(img_path).convert("RGB")
      img = self.transform(img)

      if self.test:
          return img
      else:
          mask_path = os.path.join(self.root_path, 'train', self.mask[index])
          mask = Image.open(mask_path).convert('L')
          mask = self.transform(mask)
          return img, mask


  def __len__(self):
    return len(self.images)

data = CarvanaDataset()
data.root_path
img = data.images
mask = data.mask

img_cleaned = [s.replace('_sat.jpg',"") for s in img]
mask_cleaned = [s.replace('_mask.png',"") for s in mask]
print(len(img_cleaned),len(mask_cleaned))
print(mask_cleaned[270], img_cleaned[270])

#correct = False
#for i in range(2996):
#  if img_cleaned[i] != mask_cleaned[i]:
#    correct = True
#    print(i,img_cleaned[i],mask_cleaned[i])
#print(correct)


6226 6226
141485 141485


**UNet Train**

In [9]:
import torch
from torch import optim, nn
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

LR = 3e-4
BATCH_SIZE = 8
EPOCHS = 1000
MODEL_SAVE_PATH = r'C:\V\Sem_4_notes\Pytorch\DeepGlode_Road_Extraction_DataSet'

device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_data = CarvanaDataset()

total = len(train_data)
train_len = int(0.8*total)
val_length = total - train_len
lengths = [train_len, val_length]

generator = torch.Generator().manual_seed(42)
train_dataset, val_dataset = random_split(train_data, lengths, generator = generator)

train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=True)

model = UNet(in_channels = 3, num_classes=1).to(device)
optimizer = optim.AdamW(model.parameters(),lr = LR)
criterion = nn.BCEWithLogitsLoss() #loss = BCE(torch.sigmoid(outputs), targets)

for epoch in tqdm(range(EPOCHS)):
  model.train()
  training_running_loss = 0
  for idx, (img, mask) in enumerate(tqdm(train_dataloader)):
    img = img.float().to(device)
    mask = mask.float().to(device)

    y_pred = model(img)
    optimizer.zero_grad()

    loss = criterion(y_pred, mask)
    training_running_loss += loss.item()

    loss.backward()
    optimizer.step()

  train_loss = training_running_loss/(idx+1)

  model.eval()
  val_running_loss  = 0
  with torch.no_grad():
    for idx, img_mask in enumerate(tqdm(val_dataloader)):
      img = img_mask[0].float().to(device)
      mask = img_mask[1].float().to(device)

      y_pred = model(img)
      loss = criterion(y_pred, mask)

      val_running_loss+=loss.item()

    val_loss = val_running_loss/(idx+1)
  print('-'*30)
  print(f'Train Loss Epoch {epoch + 1}: {train_loss:.4f}')
  print(f'Val Loss Epoch {epoch +1} : {val_loss: .4f}')

torch.save(model.state_dict(), MODEL_SAVE_PATH)


  0%|          | 0/623 [00:36<?, ?it/s]]
  0%|          | 0/1000 [00:36<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 13.60 GiB is allocated by PyTorch, and 740.37 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [2]:
from PIL import Image
import numpy as np

data = [
  [[255, 0, 0], [0, 255, 0]],   
  [[0, 0, 255], [255, 255, 0]]    
]


np_data = np.array(data, dtype=np.uint8)

img = Image.fromarray(np_data, mode='RGB')

large_img = img.resize((200, 200), resample=Image.NEAREST)

large_img.save("upscaled_image.png")
large_img.show()
 

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from collections import Counter

class FocalLoss(nn.Module):
  def __init__(self, weights = None, gamma = 2, reduction = 'mean' ):
    super().__init__()
    self.weights = weights
    self.gamma = gamma
    self.reduction = reduction
    self.count = 0
  
  def forward(self, y_pred, y_true):
    #if y_pred.shape != y_true.shape:
    #  y_pred = y_pred.squeeze(1)
    y_true = y_true.float()

    if self.weights is None and self.count == 0:
      flat_true = y_true.view(-1).long()
      num_samples = y_true.numel()
      print('num_samples',num_samples)
      counts = Counter(flat_true)
      length = y_pred.shape[1]
      weights = []

      for i in range(length):
        weights.append(num_samples/(counts.get(i, 1e-6*length)))
      weights = torch.tensor(weights, dtype = torch.float32, device = y_true.device)  
      self.weights = weights
      self.count+= 1
    
    print('Flat_true ',flat_true.shape)
    log_probs = F.log_softmax(y_pred, dim = 1)
    probs = torch.exp(log_probs)
    print('probs.shape ',probs.shape)
      
    probs_flat = probs.permute(0,2,3,1).reshape(-1,probs.shape[1]) # -> shape [BxHxW, C]
    pred_probs = probs_flat[torch.arange(probs_flat.size(0)), flat_true] # -> shape [N]
    print('pred_prob ',pred_probs.size(), pred_probs.dtype)
    
    log_pred_probs = torch.log(pred_probs + 1e-8)
    focal_term = (1-pred_probs)**self.gamma
    sample_weights = self.weights[flat_true]

    loss = -(sample_weights)*(focal_term)*(log_pred_probs)
    
    if self.reduction == 'mean':
      return loss.mean()
    elif self.reduction == 'sum':
      return loss.sum()
    else:
      return loss
  

loss_fn = FocalLoss()
y_pred = torch.zeros(1, 5, 2, 2)
y_pred[:, 2] = 10 
y_true = torch.full((1, 2, 2), 2)

loss = loss_fn(y_pred, y_true)
print(loss)


num_samples 4
Flat_true  torch.Size([4])
probs.shape  torch.Size([1, 5, 2, 2])
pred_prob  torch.Size([4]) torch.float32
tensor(4.7928e-06)
