## Paper Review
U net is better than the sliding window method

CNN is used for classification tasks. So for image segmentation the image is cropped then each pixel is passed through another CNN.

Advantages of this method is that since the image is divided into patches the training data is much larger than the number of training images.

Although having advantages the disadvantages outweigh the advantages.
Firstly such a network can be quite slow because the model goes through each pixel and since it checks for neighbours of the pixel there are a lot of overlapping patches.

### Architecture of U-Net
![archi.png](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/u-net-architecture.png)

It basically downscales the image to get to know what is the image and then upscales the image to know where in the image is the object located at.





## Creating the Model

In [2]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF

## This is the Double Conv used in each step
class DoubleConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super(DoubleConv, self).__init__()
    self.conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3,1,1, bias=False),  #KernelSize, Stride, Padding=1(Input and output size is same)
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3,1,1, bias=False),  #KernelSize, Stride, Padding=1(Input and output size is same)
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),  
    )
  def forward(self, x):
    return self.conv(x)

  
class UNET(nn.Module):
  def __init__(
      self, in_channels = 3, out_channels= 1, features=[64,128,256,512],
  ):
    super(UNET, self).__init__()
    self.ups = nn.ModuleList()
    self.downs = nn.ModuleList()
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    # Downsampling part of the UNET
    for feature in features:
      self.downs.append(DoubleConv(in_channels, feature))
      in_channels = feature

    # Up part of the UNET
    # We will transpose convolutions for upsampling
    for feature in reversed(features):
      self.ups.append(
          nn.ConvTranspose2d(
              feature*2, feature, kernel_size=2, stride=2,
          )
      )
      self.ups.append(DoubleConv(feature*2, feature))


    self.bottleneck = DoubleConv(features[-1], features[-1]*2)
    self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
  
  def forward(self,x):
    skip_connections = []

    for down in self.downs:
      x = down(x)
      skip_connections.append(x)
      x = self.pool(x)

    x = self.bottleneck(x)
    skip_connections = skip_connections[::-1]

    for idx in range(0, len(self.ups),2):
        x = self.ups[idx](x)
        skip_connection = skip_connections[idx//2]

        if x.shape != skip_connection.shape:
          x = TF.resize(x, size=skip_connection.shape[2:])

        concat_skip = torch.cat((skip_connection,x),dim=1)
        x = self.ups[idx+1](concat_skip)

    return self.final_conv(x)

def test():
  x = torch.randn((3,1,161,161))  # Batch Size, 1 channel, Size
  model = UNET(in_channels=1, out_channels=1)
  preds = model(x)
  print(preds.shape)
  print(x.shape)
  assert preds.shape == x.shape

if __name__ == "__main__":
  test()












ModuleNotFoundError: No module named 'torch'

# Getting the data

In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

Mounted at /content/gdrive


In [None]:
!pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 63 bytes


In [None]:
!rm -r /content/sample_data

rm: cannot remove '/content/sample_data': No such file or directory


In [None]:
!kaggle competitions download -c carvana-image-masking-challenge

Downloading carvana-image-masking-challenge.zip to /content
100% 24.4G/24.4G [02:51<00:00, 194MB/s]
100% 24.4G/24.4G [02:51<00:00, 153MB/s]


In [None]:
!unzip carvana-image-masking-challenge.zip

In [None]:
!unzip metadata.csv.zip
!unzip sample_submission.csv.zip
!unzip test.zip
!unzip test_hq.zip
!unzip train.zip
!unzip train_hq.zip
!unzip train_masks.csv.zip
!unzip train_masks.zip

## Cleaning Data

In [2]:
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np

In [3]:
class CarvanaDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transfrom = None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transfrom =  transfrom
        self.images = os.listdir(image_dir)
        
    def __len__(self):
        return len(self.images)
    def __getitem__(self,index):
        img_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index].replace(".jpg", "_mask.gif"))
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype = np.float32)
        mask[mask == 255.0] = 1.0
        
        if self.transfrom is not None:
            augmentations = self.transform (image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]
        return image, mask
        

## Training the data

In [1]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from model import UNET

ModuleNotFoundError: No module named 'torch'