<a href="https://colab.research.google.com/github/Dr-Doofensmirtz/my_projects/blob/master/data_science_bowl_2018/1_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# getting data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


downloading data from kaggle

In [2]:
import os
try:
  os.environ['KAGGLE_CONFIG_DIR'] = "drive/MyDrive/kaggle/"
except:
  print('path not found')

In [3]:
!kaggle competitions download -c data-science-bowl-2018

Downloading stage1_sample_submission.csv.zip to /content
  0% 0.00/2.62k [00:00<?, ?B/s]
100% 2.62k/2.62k [00:00<00:00, 2.30MB/s]
Downloading stage1_train.zip to /content
 85% 67.0M/79.1M [00:00<00:00, 48.9MB/s]
100% 79.1M/79.1M [00:00<00:00, 124MB/s] 
Downloading stage1_test.zip to /content
  0% 0.00/9.10M [00:00<?, ?B/s]
100% 9.10M/9.10M [00:00<00:00, 83.7MB/s]
Downloading stage2_test_final.zip to /content
 97% 269M/276M [00:05<00:00, 41.5MB/s]
100% 276M/276M [00:05<00:00, 50.1MB/s]
Downloading stage2_sample_submission_final.csv.zip to /content
  0% 0.00/112k [00:00<?, ?B/s]
100% 112k/112k [00:00<00:00, 37.2MB/s]
Downloading stage1_train_labels.csv.zip to /content
  0% 0.00/2.67M [00:00<?, ?B/s]
100% 2.67M/2.67M [00:00<00:00, 180MB/s]
Downloading stage1_solution.csv.zip to /content
  0% 0.00/386k [00:00<?, ?B/s]
100% 386k/386k [00:00<00:00, 116MB/s]


extracting training data

In [4]:
import zipfile
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import shutil
import glob

from skimage.io import *
from skimage.transform import resize
from sklearn import model_selection

import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
from torch import nn

import albumentations as A
from albumentations.pytorch import ToTensor

In [5]:
with zipfile.ZipFile("stage1_train.zip", "r") as file:
  file.extractall("train_data/")

In [6]:
def get_mask(mask_dir, IMG_HEIGHT, IMG_WIDTH):   
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)
    
    for mask_f in os.listdir(mask_dir):
        mask_ = imread(os.path.join(mask_dir, mask_f))
        mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT,IMG_WIDTH), mode='constant', preserve_range=True), axis=-1)
        mask = np.maximum(mask, mask_)
    return mask

In [7]:
def get_folds(df):

  df['kfold'] = -1
  df = df.sample(frac=1).reset_index(drop=True)
  kf = model_selection.KFold(n_splits=5)

  for fold_, (_, x) in enumerate(kf.split(df)):
      for xs in x:
          df.loc[xs, "kfold"] = fold_

  return df

In [None]:
def visualize_data(num=5):
  fig, ax = plt.subplots(num, 2, figsize=(num, 5))
  
  for i in range(num):
    ax[i,0] = imshow()

In [8]:
img_dir = glob.glob("train_data/*/images/*.png")
mask_dir = glob.glob("train_data/*/masks/")

In [9]:
data = {"img_": img_dir, "mask_": mask_dir}
df = pd.DataFrame(data=data)
df_fold = get_folds(df)

In [10]:
def get_train_transform():
   return A.Compose(
       [
        A.Resize(256,256),
        A.HorizontalFlip(p=0.25),
        A.VerticalFlip(p=0.25),
        ToTensor()
        ])
   
def get_valid_transform():
  return A.Compose([
                    A.Resize(256,256),
                    ToTensor()
  ])

In [11]:
class dataSet(Dataset):
  def __init__(self, df, fold=0, train=True, transforms= None):
    self.transforms = transforms

    if train:
      self.df = df[df.kfold != fold].reset_index(drop=True)
    else:
      self.df = df[df.kfold == fold].reset_index(drop=True)

  def __getitem__(self, idx):
    img_path = df.loc[idx, "img_"]
    mask_path = df.loc[idx, "mask_"]

    img = imread(img_path).astype('float32')
    img = img/255.0
    mask = get_mask(mask_path, img.shape[0], img.shape[1]).astype('float32')
    mask = mask/255.0
    
    augmented = self.transforms(image=img, mask=mask)
    img = augmented['image']
    mask = augmented['mask']
    mask = mask[0].permute(2, 0, 1)
    return img,mask

  def __len__(self):
    return len(self.df)

In [12]:
train_dataset = dataSet(df_fold, fold=0, train=True, transforms= get_train_transform())
valid_dataset = dataSet(df_fold, fold=0, train=False, transforms = get_valid_transform())

# model

In [13]:
!pip install git+https://github.com/qubvel/segmentation_models.pytorch

Collecting git+https://github.com/qubvel/segmentation_models.pytorch
  Cloning https://github.com/qubvel/segmentation_models.pytorch to /tmp/pip-req-build-bzo95x6t
  Running command git clone -q https://github.com/qubvel/segmentation_models.pytorch /tmp/pip-req-build-bzo95x6t
Collecting pretrainedmodels==0.7.4
[?25l  Downloading https://files.pythonhosted.org/packages/84/0e/be6a0e58447ac16c938799d49bfb5fb7a80ac35e137547fc6cee2c08c4cf/pretrainedmodels-0.7.4.tar.gz (58kB)
[K     |████████████████████████████████| 61kB 5.2MB/s 
[?25hCollecting efficientnet-pytorch==0.6.3
  Downloading https://files.pythonhosted.org/packages/b8/cb/0309a6e3d404862ae4bc017f89645cf150ac94c14c88ef81d215c8e52925/efficientnet_pytorch-0.6.3.tar.gz
Collecting timm==0.3.2
[?25l  Downloading https://files.pythonhosted.org/packages/51/2d/39ecc56fbb202e1891c317e8e44667299bc3b0762ea2ed6aaaa2c2f6613c/timm-0.3.2-py3-none-any.whl (244kB)
[K     |████████████████████████████████| 245kB 9.3MB/s 
Collecting munch
  Down

In [14]:
import segmentation_models_pytorch as smp

In [15]:
ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['cell']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multicalss segmentation
DEVICE = 'cuda'

model = smp.Unet(encoder_name=ENCODER,
                 encoder_weights = ENCODER_WEIGHTS,
                 in_channels=4,
                 classes=len(CLASSES))

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth" to /root/.cache/torch/hub/checkpoints/se_resnext50_32x4d-a260b3a4.pth


HBox(children=(FloatProgress(value=0.0, max=110559176.0), HTML(value='')))




In [16]:
train_dataloader = DataLoader(train_dataset, batch_size=16, num_workers=12)
valid_dataloader = DataLoader(valid_dataset, batch_size=8, num_workers=4)

In [17]:
loss = smp.utils.losses.DiceLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=0.0001),
])

In [18]:
train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

In [20]:
max_score = 0

for i in range(0, 40):
    
    print('\nEpoch: {}'.format(i))
    train_logs = train_epoch.run(train_dataloader)
    valid_logs = valid_epoch.run(valid_dataloader)
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, './best_model.pth')
        print('Model saved!')
        
    if i == 25:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')


Epoch: 0
train: 100%|██████████| 34/34 [04:49<00:00,  8.52s/it, dice_loss - -7.071, iou_score - 0.335]
valid: 100%|██████████| 17/17 [01:23<00:00,  4.94s/it, dice_loss - 1.068, iou_score - 0.004867]
Model saved!

Epoch: 1
train: 100%|██████████| 34/34 [04:45<00:00,  8.41s/it, dice_loss - 3.542, iou_score - 0.2068]
valid: 100%|██████████| 17/17 [01:21<00:00,  4.82s/it, dice_loss - 9.774, iou_score - 0.003268]

Epoch: 2
train: 100%|██████████| 34/34 [04:43<00:00,  8.35s/it, dice_loss - 2.359, iou_score - 0.1812]
valid: 100%|██████████| 17/17 [01:22<00:00,  4.86s/it, dice_loss - -3.069, iou_score - 0.004046]

Epoch: 3
train: 100%|██████████| 34/34 [04:53<00:00,  8.63s/it, dice_loss - 2.107, iou_score - 0.1697]
valid: 100%|██████████| 17/17 [01:23<00:00,  4.89s/it, dice_loss - 0.6969, iou_score - 0.1018]
Model saved!

Epoch: 4
train: 100%|██████████| 34/34 [04:36<00:00,  8.13s/it, dice_loss - 1.927, iou_score - 0.1613]
valid: 100%|██████████| 17/17 [01:17<00:00,  4.56s/it, dice_loss - -0.

KeyboardInterrupt: ignored