In [1]:
import cv2
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install utils

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting utils
  Downloading utils-1.0.1-py2.py3-none-any.whl (21 kB)
Installing collected packages: utils
Successfully installed utils-1.0.1


Model

In [3]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF

#making class of 2 convolution layers in a step
class DoubleConv(nn.Module):                                             
	def __init__(self,in_c,out_c):
		super(DoubleConv,self).__init__()
		self.conv = nn.Sequential(# as it is not random so we are using sequential
			nn.Conv2d(in_c,out_c,3,1,1,bias=False),# kernel size=3, stride and padding=1 according to the paper 
			nn.BatchNorm2d(out_c),
			nn.ReLU(inplace=True),#and bias is False as we are usng batch norm so if we use both it cancels out each others effect
			nn.Conv2d(out_c,out_c,3,1,1,bias=False),# here we are using the same input and output i.e out 
			nn.BatchNorm2d(out_c),
			nn.ReLU(inplace=True))#channels because the depth remains same in one step

	def forward(self,a):# this is doing forward pass
		return self.conv(a)


 # this is the UNET architecture implementation
class UNet(nn.Module):
	def __init__(self,in_c = 3,out_c = 1,features=[64,128,256,512]):# as RGB image is input so we are taking 3 channels,output channels have 1 as we are doing binary segmentation
 # features represent the depth size that is increasing or decreasing per step after 2 convolutions.
		super(UNet,self).__init__()
		self.up=nn.ModuleList()# the features/depth list storing the information of input and output at upsampling process
		self.down = nn.ModuleList()# the features/depth list storing the information of input and output at downsampling process
		self.pool = nn.MaxPool2d(kernel_size=2,stride=2)# application of maxpooling after each step(application of 2 convolution layers)

		#going down
		for f in features:
			self.down.append(DoubleConv(in_c,f))
			in_c = f

		#bottom layer
		self.neck = DoubleConv(features[-1],features[-1]*2) # the bottleneck or the in between layer between upsampling and downsampling
		

		#going up 
		for feature in reversed(features):
			self.up.append(nn.ConvTranspose2d(feature*2 , feature, kernel_size =2,stride=2,))
			self.up.append(DoubleConv(feature*2,feature))

		
		self.final_conv = nn.Conv2d(features[0],out_c,kernel_size=1)#the final layer after upsampling


	def forward(self,a):
		skip_connections = []

		for layers in self.down:#applying the downsamping and the 2 convolution layer step
			a = layers(a)
			skip_connections.append(a)
			a = self.pool(a)


		a = self.neck(a)#applying the bottleneck

		skip_connections = skip_connections[::-1] #reverse the list #skip connection for upsampling
		#skip_connections = skip_connections.reverse()

		for idx in range(0,len(self.up),2):#applying downsampling steps
			a = self.up[idx](a)
			skip_connection = skip_connections[idx //2]
			
			if a.shape != skip_connection.shape:#for equal size of input and output image
				a = TF.resize(a,size=skip_connection.shape[2:])  #resize ,[2:] get the current shape

			concat_skip  = torch.cat((skip_connection,a),dim=1)

			a = self.up[idx+1](concat_skip)


		a = self.final_conv(a)#applying the final layer convolution after upsampling

		return a



def test():
	a = torch.rand((3,1,161,161))
	model = UNet(1,1)
	pred = model(a)
#	model.save('/content/drive/MyDrive/UNET/') 
	print(pred.shape)
	assert pred.shape==a.shape

#if __name__ =='__main__':
test()

torch.Size([3, 1, 161, 161])


Dataset

In [4]:
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
class data(Dataset):
  def __init__(self,image_dir,mask_dir,transform=None):# assigning respective directories
    self.image_dir=image_dir
    self.mask_dir=mask_dir
    self.transform=transform
    self.images=os.listdir(image_dir)#listing all those files in that folder
  def __len__(self):#getting the length of the images
    return len(self.images)
  def __getitem__(self,index):
    img_path=os.path.join(self.image_dir,self.images[index])
    mask_path=os.path.join(self.mask_dir,self.images[index])
    image=np.array(Image.open(img_path).convert("RGB"))#converting to colored image
    mask=np.array(Image.open(mask_path).convert("L"),dtype=np.float32)#converting to binary image
    mask[mask==255.0]=1.0
    if self.transform is not None:#data augmentation
      augmentations=self.transform(image=image,mask=mask)
      image=augmentations["image"]
      mask=augmentations["mask"]
    return image,mask


Utils

In [5]:
import torch
import torchvision
from torch.utils.data import DataLoader


device = 'cuda' #if torch.cuda.is_available() else'cpu'
#saving checkpoint
def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)
#loading that checkpoint
def load_checkpoint(checkpoint, model):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])

def get_loaders(
    train_dir,
    train_maskdir,
    val_dir,
    val_maskdir,
    batch_size,
    train_transform,
    val_transform,
    num_workers=4,
    pin_memory=True,#makes sure that no copy elements are on the disk
):
#training dataset
    train_ds = data(
        image_dir=train_dir,
        mask_dir=train_maskdir,
        transform=train_transform,
    )
#training loader
    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
    )
#validation dataset
    val_ds = data(
        image_dir=val_dir,
        mask_dir=val_maskdir,
        transform=val_transform,
    )
#validation loader
    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
    )

    return train_loader, val_loader


def check_accuracy(loader, model, device="cuda"):
    num_correct = 0
    num_pixels = 0
    dice_score = 0
    model.eval()

    with torch.no_grad():#checking against pixels accuracy
        for x, y in loader:
            x = x.to(device)#mounting to gpu
            y = y.to(device).unsqueeze(1)#mounting to gpu
            preds = torch.sigmoid(model(x))#applying the sigmoid function
            preds = (preds > 0.5).float()
            num_correct += (preds == y).sum()#accumulating the predictions
            num_pixels += torch.numel(preds)
            dice_score += (2 * (preds * y).sum()) / (
                (preds + y).sum() + 1e-8
            )

    print(
        f"Got {num_correct}/{num_pixels} with acc {num_correct/num_pixels*100:.2f}"
    )
    print(f"Dice score: {dice_score/len(loader)}")
    model.train()


def save_predictions_as_imgs(#saving the predictions
    loader, model, folder="saved_images/", device="cuda"
):
    model.eval()#evaluating
    for idx, (x, y) in enumerate(loader):
        x = x.to(device=device)
        with torch.no_grad():
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
        torchvision.utils.save_image(
            preds, f"{folder}/pred_{idx}.png"
        )
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}{idx}.png")

  #  model.train()

Training with test

In [6]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim

learning_rate = 1e-4
device = 'cuda' if torch.cuda.is_available() else'cpu'
batch_size = 32
num_epochs = 1
num_workers = 2
image_height = 160  #1280 og
image_width = 240 #1918 og
pin_memory = True 
load_model = False
train_img_dir = '/content/drive/MyDrive/dataset/data/train/image/'
train_mask_dir = '/content/drive/MyDrive/dataset/data/train/mask/'
val_img_dir = '/content/drive/MyDrive/dataset/data/test/image/'
val_mask_dir='/content/drive/MyDrive/dataset/data/test/mask/'

def train_fn(loader,model,optimizer,loss_fn,scaler):
	loop = tqdm(loader)#making progress bar

	for batch_idx,(data,targets) in enumerate(loop):
		data = data.to(device)
		targets = targets.float().unsqueeze(1).to(device=device)

		with torch.cuda.amp.autocast():#pushig data to 16bit
			predictions = model(data)
			loss = loss_fn(predictions,targets)

		optimizer.zero_grad()#backward propagation
		scaler.scale(loss).backward()
		scaler.step(optimizer)
		scaler.update()



		#tqdmloop
		loop.set_postfix(loss=loss.item())#showing loss function





#data augmentation main part
train_transform = A.Compose([ 
		A.Resize(height=image_height,width = image_width),
		A.Rotate(limit=35,p=1.0),
		A.HorizontalFlip(p=0.5),
		A.VerticalFlip(p=0.1),
		A.Normalize(
			mean=[0.0,0.0,0.0],
			std=[1.0,1.0,1.0],
			max_pixel_value = 255.0
			),
		ToTensorV2(), ] )
val_transforms = A.Compose(

		[ 

		A.Resize(height=image_height,width = image_width),
		A.Normalize(
			mean=[0.0,0.0,0.0],
			std=[1.0,1.0,1.0],
			max_pixel_value = 255.0
			),
		ToTensorV2(), ] )

model = UNet(3,1).to(device)

loss_fn = nn.BCEWithLogitsLoss()#binary cross entropy loss for binary image
optimizer = optim.Adam(model.parameters(),lr=learning_rate)#optimizer function


train_loader,val_loader = get_loaders(
		train_img_dir,
		train_mask_dir,
		val_img_dir,
		val_mask_dir,
		batch_size,
		train_transform,
    val_transforms,
		num_workers,
		pin_memory,
	)
scaler = torch.cuda.amp.GradScaler()#gradient scaling
for epoch in range(num_workers):
	train_fn(train_loader,model,optimizer,loss_fn,scaler)

		#save
		#check accuracy
	checkpoint = {
			'state_dic':model.state_dict(),
			'optimizer':optimizer.state_dict(),
		}
	save_checkpoint(checkpoint)

	check_accuracy(val_loader,model,device=device)

	save_predictions_as_imgs(
		val_loader,model,folder='/content/drive/MyDrive/dataset/data/saved_images/',device=device)



#main()#

100%|██████████| 69/69 [05:27<00:00,  4.75s/it, loss=0.614]


=> Saving checkpoint
Got 14884255/21043200 with acc 70.73
Dice score: 9.126721124630421e-05


100%|██████████| 69/69 [00:29<00:00,  2.31it/s, loss=0.672]


=> Saving checkpoint
Got 14884799/21043200 with acc 70.73
Dice score: 0.0


The most difficult part was the integration and how the steps were broken down as we have never implemented a neuralnetwork of this size. By watching the tutorial, i was able to grasp the understanding of the integration and connection.