<a href="https://colab.research.google.com/github/Dimildizio/DS_course/blob/main/Neural_networks/Convolutions/Breast_cancer_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Breast cancer segmentation

## Installs & Imports

In [1]:
%%capture
#pip install catalyst

In [24]:
#import albumentations as albs
import cv2
import io
import os
import random
import torch
import torch.nn as nn

from glob import glob
from sklearn.model_selection import train_test_split
from torch import optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from IPython import display

#from catalyst import dl, utils

## Download dataset

In [3]:
!curl -JLO 'https://www.dropbox.com/scl/fi/gs3kzp6b8k6faf667m5tt/breast-cancer-cells-segmentation.zip?rlkey=md3mzikpwrvnaluxnhms7r4zn'
!unzip breast-cancer-cells-segmentation.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   256    0   256    0     0    331      0 --:--:-- --:--:-- --:--:--   331
100   491    0   491    0     0    208      0 --:--:--  0:00:02 --:--:--  1352
100 93.6M  100 93.6M    0     0  11.5M      0  0:00:08  0:00:08 --:--:-- 19.7M
Archive:  breast-cancer-cells-segmentation.zip
  inflating: Images/ytma10_010704_benign1_ccd.tif  
  inflating: Images/ytma10_010704_benign1_ccd.tif.xml  
  inflating: Images/ytma10_010704_benign2_ccd.tif  
  inflating: Images/ytma10_010704_benign2_ccd.tif.xml  
  inflating: Images/ytma10_010704_benign3_ccd.tif  
  inflating: Images/ytma10_010704_benign3_ccd.tif.xml  
  inflating: Images/ytma10_010704_malignant1_ccd.tif  
  inflating: Images/ytma10_010704_malignant1_ccd.tif.xml  
  inflating: Images/ytma10_010704_malignant2_ccd.tif  
  inflating: Images/ytma10_010704_malignant2_ccd.tif.xml  
  infl

## EDA

In [10]:
!unzip breast-cancer-cells-segmentation.zip

Archive:  breast-cancer-cells-segmentation.zip
replace Images/ytma10_010704_benign1_ccd.tif? [y]es, [n]o, [A]ll, [N]one, [r]ename: Y
  inflating: Images/ytma10_010704_benign1_ccd.tif  
replace Images/ytma10_010704_benign1_ccd.tif.xml? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Images/ytma10_010704_benign1_ccd.tif.xml  
replace Images/ytma10_010704_benign2_ccd.tif? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Images/ytma10_010704_benign2_ccd.tif  
replace Images/ytma10_010704_benign2_ccd.tif.xml? [y]es, [n]o, [A]ll, [N]one, [r]ename: a
error:  invalid response [a]
replace Images/ytma10_010704_benign2_ccd.tif.xml? [y]es, [n]o, [A]ll, [N]one, [r]ename: ya
  inflating: Images/ytma10_010704_benign2_ccd.tif.xml  
replace Images/ytma10_010704_benign3_ccd.tif? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: Images/ytma10_010704_benign3_ccd.tif  
  inflating: Images/ytma10_010704_benign3_ccd.tif.xml  
  inflating: Images/ytma10_010704_malignant1_ccd.tif  
  inflating: Ima

In [11]:
imgs = glob(os.path.join("/content/Images", "*.tif"))

In [12]:
print(os.getcwd())

/content


In [13]:
masks = [os.path.join("/content/Masks", i.rsplit("/",1)[-1].split("_ccd")[0]+".TIF") for i in imgs]

In [14]:
print(masks)

['/content/Masks/ytma49_042403_benign1.TIF', '/content/Masks/ytma49_042203_malignant1.TIF', '/content/Masks/ytma49_111003_malignant3.TIF', '/content/Masks/ytma49_042403_benign3.TIF', '/content/Masks/ytma49_042003_malignant1.TIF', '/content/Masks/ytma12_010804_benign2.TIF', '/content/Masks/ytma10_010704_malignant3.TIF', '/content/Masks/ytma55_030603_benign5.TIF', '/content/Masks/ytma49_042003_benign1.TIF', '/content/Masks/ytma10_010704_benign3.TIF', '/content/Masks/ytma49_042203_malignant3.TIF', '/content/Masks/ytma49_111003_benign2.TIF', '/content/Masks/ytma49_042003_benign3.TIF', '/content/Masks/ytma12_010804_malignant3.TIF', '/content/Masks/ytma49_042203_benign3.TIF', '/content/Masks/ytma10_010704_benign2.TIF', '/content/Masks/ytma55_030603_benign1.TIF', '/content/Masks/ytma49_042403_benign2.TIF', '/content/Masks/ytma12_010804_malignant1.TIF', '/content/Masks/ytma12_010804_benign1.TIF', '/content/Masks/ytma49_042403_malignant2.TIF', '/content/Masks/ytma23_022103_benign3.TIF', '/conte

In [21]:
dataset_tuples = list(zip(imgs, masks))
random.shuffle(dataset_tuples)
train_tuples, test_tuples = train_test_split(dataset_tuples)

## Write Dataloaders and transform data

In [22]:
SIZE = 512

In [26]:
class BreastDataset(Dataset):

    def __init__(self, img_mask):
        self.img_mask = img_mask

    def __len__(self,):
        return len(self.img_mask)

    def __getitem__(self, idx):
        img_path, mask_path = self.img_mask[idx]
        image = self.get_transform(img_path)
        mask = self.get_transform(mask_path, normalize=False)
        mask[mask > 0] = 1
        return image,mask

    def transform_image(self, path, normalize=True,resize=(SIZE, SIZE)):
       image = io.imread(path)
       image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
       image = cv2.resize(image, resize)
       if normalize:
          return image/255
       return image

In [28]:
train_dataset = BreastDataset(train_tuples)
test_dataset = BreastDataset(test_tuples)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)