# Breast Cancer Detection Model Ver01

## 0. Import Module

- [OpenSlide](https://openslide.org/api/python/#module-openslide)

In [1]:
%matplotlib inline
import os
import csv
import cv2
import openslide
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow.keras import layers, models
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from openslide.deepzoom import DeepZoomGenerator

## 1. Data Load

In [2]:
# slide path
slide_path = '../data/train/pos/16-S-042893_A1.mrxs'  # pos
# neg_slide_path = '../data/train/16-S-042725_A.mrxs'  # neg

# mask path
mask_path = '../data/train/pos/Mask_16-S-042893_A1.png'  # pos
# neg_mask_path = '../data/train/Mask_16-S-042725_A.png'  # neg

# openslide
## slide
slide = openslide.open_slide(slide_path)
# neg_slide = openslide.open_slide(neg_slide_path)
## mask
mask = openslide.open_slide(mask_path)
# neg_mask = openslide.open_slide(neg_mask_path)

In [3]:
print('origin pos_slide.dimensions :', slide.dimensions)
# print('origin neg_slide.dimensions :', neg_slide.dimensions)
print('origin pos_mask.dimensions :', mask.dimensions)
# print('origin neg_mask.dimensions :', neg_mask.dimensions)

origin pos_slide.dimensions : (93970, 234042)
origin pos_mask.dimensions : (5316, 10007)


## 2. Patch Gen

### 1) Find Patches from Slide

In [None]:
patch_size = 256
is_train = True

# 해당 데이터가 양성인지 판단
slide_contains_tumor = 'pos' in slide_path

# read_region을 위한 start, level, size 계산
bounds_offset_props = (openslide.PROPERTY_NAME_BOUNDS_X, openslide.PROPERTY_NAME_BOUNDS_Y)
bounds_size_props = (openslide.PROPERTY_NAME_BOUNDS_WIDTH, openslide.PROPERTY_NAME_BOUNDS_HEIGHT)

with openslide.open_slide(slide_path) as slide:
    start = (int(slide.properties.get(bounds_offset_props[0], 0)), 
             int(slide.properties.get(bounds_offset_props[1], 0)))
    level = int(np.log2(patch_size))
    
    size_scale = tuple(int(slide.properties.get(prop, dim)) / dim 
                       for prop, dim in zip(bounds_size_props, slide.dimensions))
    l_dimensions = [(int(np.ceil(dim_x * size_scale[0])), int(np.ceil(dim_y * size_scale[1])))
                    for dim_x, dim_y in slide.level_dimensions]
    size = l_dimensions[level]
    
    slide4 = slide.read_region(start, level, size)
    
    

In [16]:
bounds_offset_props = (openslide.PROPERTY_NAME_BOUNDS_X, openslide.PROPERTY_NAME_BOUNDS_Y)
bounds_size_props = (openslide.PROPERTY_NAME_BOUNDS_WIDTH, openslide.PROPERTY_NAME_BOUNDS_HEIGHT)

In [28]:
size_scale = tuple(int(slide.properties.get(prop, dim)) / dim 
                       for prop, dim in zip(bounds_size_props, slide.dimensions))
size_scale

(0.905097371501543, 0.6841122533562352)

In [27]:
for dim in slide.level_dimensions:
    for 

(93970, 234042)
(46985, 117021)
(23492, 58510)
(11746, 29255)
(5873, 14627)
(2936, 7313)
(1468, 3656)
(734, 1828)
(367, 914)
(183, 457)


In [32]:
tuple(tuple(int(np.ceil(l_lim * scale))
                for l_lim, scale in zip(l_size, size_scale))
                    for l_size in slide.level_dimensions)

((85052, 160111),
 (42526, 80056),
 (21263, 40028),
 (10632, 20014),
 (5316, 10007),
 (2658, 5003),
 (1329, 2502),
 (665, 1251),
 (333, 626),
 (166, 313))

In [35]:
# for l_size in slide.level_dimensions:
#     for l_lim, scale in zip(l_size, size_scale):
#         print(l_lim, ' |', scale)

In [41]:
tuple()

TypeError: tuple() takes at most 1 argument (2 given)

In [43]:
for l_size in slide.level_dimensions:
    for l_lim, scale in zip(l_size, size_scale):
        print(l_lim, ' |', scale)

93970  | 0.905097371501543
234042  | 0.6841122533562352
46985  | 0.905097371501543
117021  | 0.6841122533562352
23492  | 0.905097371501543
58510  | 0.6841122533562352
11746  | 0.905097371501543
29255  | 0.6841122533562352
5873  | 0.905097371501543
14627  | 0.6841122533562352
2936  | 0.905097371501543
7313  | 0.6841122533562352
1468  | 0.905097371501543
3656  | 0.6841122533562352
734  | 0.905097371501543
1828  | 0.6841122533562352
367  | 0.905097371501543
914  | 0.6841122533562352
183  | 0.905097371501543
457  | 0.6841122533562352


In [49]:
l_dimensions = [(int(np.ceil(dim_x * size_scale[0])), int(np.ceil(dim_y * size_scale[1])))
                for dim_x, dim_y in slide.level_dimensions]

In [50]:
l_dimensions

[(85052, 160111),
 (42526, 80056),
 (21263, 40028),
 (10632, 20014),
 (5316, 10007),
 (2658, 5003),
 (1329, 2502),
 (665, 1251),
 (333, 626),
 (166, 313)]

In [48]:
for dim_x, dim_y in slide.level_dimensions:
    tmp = (int(np.ceil(dim_x * size_scale[0])), 
           int(np.ceil(dim_y * size_scale[1])))
    print(tmp)

(85052, 160111)
(42526, 80056)
(21263, 40028)
(10632, 20014)
(5316, 10007)
(2658, 5003)
(1329, 2502)
(665, 1251)
(333, 626)
(166, 313)


In [45]:
a = tuple((1, 2, 3))

In [46]:
a

(1, 2, 3)

In [47]:
a + (4, 5)

(1, 2, 3, 4, 5)