In [1]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d jy2040/kvasir-seg

Downloading kvasir-seg.zip to /content
 75% 33.0M/44.0M [00:00<00:00, 175MB/s] 
100% 44.0M/44.0M [00:00<00:00, 177MB/s]


In [3]:
!unzip /content/kvasir-seg.zip -d /content/kvasir-seg

Archive:  /content/kvasir-seg.zip
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0qkwl35piu0993l0dewei2.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0qoxqj9q6s0835b43399p4.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0qx73cjw570799j4n5cjze.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0roawvklrq0799vmjorwfv.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0rx1idathl0835detmsp84.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0s2a9ekvms080138tjjpxr.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0s690hkp960855tjuaqvv0.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0sr5ghl0nd08789uzf1raf.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0sxqiclckk08551ycbwhno.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0t4oil7vzk099370nun5h9.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cju0tl3uz8blh0993wxvn7ly3.jpg  
  inflating: /content/kvasir-seg/Kvasir-SEG/images/cj

In [8]:

import os
import random

import numpy as np
import cv2
from glob import glob
from scipy.ndimage.interpolation import rotate
from tqdm import tqdm
from sklearn.model_selection import train_test_split

def read_image(imagefile, grayscale=False):
    if grayscale == True:
        image = cv2.imread(imagefile)
        #image = np.expand_dims(image, -1)
    else:
        image = cv2.imread(imagefile)
    return image

def save_image(image, mask, path, binary=True):
    image = np.array(image)
    if binary == True:
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    # cv2.imshow('image', image)
    # cv2.waitKey(0)
    # cv2.imshow('image', mask)
    # cv2.waitKey(0)
    # print(path[0],path[1])
    cv2.imwrite(path[0], image)
    cv2.imwrite(path[1], mask)

def concat_images(images, rows, cols):
    _, h, w, _ = images.shape
    images = images.reshape((rows, cols, h, w, 3))
    images = images.transpose(0, 2, 1, 3, 4)
    images = images.reshape((rows * h, cols * w, 3))
    return images

def check_size(size):
    if type(size) == int:
        size = (size, size)
    if type(size) != tuple:
        raise TypeError('size is int or tuple')
    return size

def subtract(image):
    image = image / 255
    return image

def resize(image, size):
    size = check_size(size)
    image = cv2.resize(image, size)
    return image

def center_crop(image, mask, crop_size, size):
    h, w, _ = image.shape
    crop_size = check_size(crop_size)
    top = (h - crop_size[0]) // 2
    left = (w - crop_size[1]) // 2
    bottom = top + crop_size[0]
    right = left + crop_size[1]

    image = image[top:bottom, left:right, :]
    mask = mask[top:bottom, left:right, :]

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def random_crop(image, mask, crop_size, size):
    crop_size = check_size(crop_size)
    h, w, _ = image.shape
    top = np.random.randint(0, h - crop_size[0])
    left = np.random.randint(0, w - crop_size[1])
    bottom = top + crop_size[0]
    right = left + crop_size[1]

    image = image[top:bottom, left:right, :]
    mask = mask[top:bottom, left:right, :]

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def horizontal_flip(image, mask, size):
    image = image[:, ::-1, :]
    mask = mask[:, ::-1, :]

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def vertical_flip(image, mask, size):
    image = image[::-1, :, :]
    mask = mask[::-1, :, :]

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def scale_augmentation(image, mask, scale_range, crop_size, size):
    scale_size = np.random.randint(*scale_range)
    image = cv2.resize(image, (scale_size, scale_size))
    mask = cv2.resize(mask, (scale_size, scale_size))
    image, mask = random_crop(image, mask, crop_size, size)
    return image, mask

def random_rotation(image, mask, size, angle_range=(0, 90)):
    h1, w1, _ = image.shape
    h2, w2, _ = mask.shape

    angle = np.random.randint(*angle_range)
    image = rotate(image, angle)
    image = resize(image, (h1, w1))

    mask = rotate(mask, angle)
    mask = resize(mask, (h2, w2))

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def cutout(image_origin, mask_origin, mask_size, mask_value='mean'):
    image = np.copy(image_origin)
    mask = np.copy(mask_origin)

    if mask_value == 'mean':
        mask_value = image.mean()
    elif mask_value == 'random':
        mask_value = np.random.randint(0, 256)

    h, w, _ = image.shape
    top = np.random.randint(0 - mask_size // 2, h - mask_size)
    left = np.random.randint(0 - mask_size // 2, w - mask_size)
    bottom = top + mask_size
    right = left + mask_size
    if top < 0:
        top = 0
    if left < 0:
        left = 0

    image[top:bottom, left:right, :].fill(mask_value)
    mask[top:bottom, left:right, :].fill(0)

    image = resize(image, size)
    mask = resize(mask, size)

    return image, mask

def brightness_augment(img, mask, factor=0.5):
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) #convert to hsv
    hsv = np.array(hsv, dtype=np.float64)
    hsv[:, :, 2] = hsv[:, :, 2] * (factor + np.random.uniform()) #scale channel V uniformly
    hsv[:, :, 2][hsv[:, :, 2] > 255] = 255 #reset out of range values
    rgb = cv2.cvtColor(np.array(hsv, dtype=np.uint8), cv2.COLOR_HSV2RGB)

    image = resize(rgb, size)
    mask = resize(mask, size)

    return image, mask

def rgb_to_grayscale(img, mask):
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    img = [img, img, img]
    img = np.transpose(img, (1, 2, 0))

    image = resize(img, size)
    mask = resize(mask, size)
    return image, mask

def create_dir(name):
    try:
        os.mkdir(name)
    except:
        pass


In [16]:
size = (256, 256)
crop_size = (300, 300)

path = "/content/kvasir-seg"
dataset_name = "Kvasir-SEG"
full_path = os.path.join(path, dataset_name)
print(full_path)

new_path = "new_data"
create_dir(new_path)
new_full_path = os.path.join(new_path, dataset_name)
print("Path:",new_full_path)

train_path = os.path.join(new_full_path, "train")
valid_path = os.path.join(new_full_path, "valid")
test_path = os.path.join(new_full_path, "test")

/content/kvasir-seg/Kvasir-SEG
Path: new_data/Kvasir-SEG


In [17]:
if not os.path.exists(new_full_path):
    os.mkdir(new_full_path)
    for path in [train_path, valid_path, test_path]:
        os.mkdir(path)
        os.mkdir(os.path.join(path, "images"))
        os.mkdir(os.path.join(path, "masks"))

In [18]:
images = glob(os.path.join(full_path, "images/", "*"))
masks = glob(os.path.join(full_path, "masks/", "*"))

In [19]:
images.sort()
masks.sort()

len_ids = len(images)
print(len_ids)
train_size = int((80/100)*len_ids)
valid_size = int((10/100)*len_ids)		## Here 10 is the percent of images used for validation
test_size = int((10/100)*len_ids)		## Here 10 is the percent of images used for testing
print(test_size)

1000
100


In [20]:
# test and training split
train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
train_masks, test_masks = train_test_split(masks, test_size=test_size, random_state=42)

train_images, valid_images = train_test_split(train_images, test_size=test_size, random_state=42)
train_masks, valid_masks = train_test_split(train_masks, test_size=test_size, random_state=42)

print("Total Size: ", len_ids)
print("Training Size: ", train_size)
print("Validation Size: ", valid_size)
print("Testing Size: ", test_size)

Total Size:  1000
Training Size:  800
Validation Size:  100
Testing Size:  100


In [21]:
## Validation images and masks
for idx, p in tqdm(enumerate(test_images), total=len(test_images)):
    ## Path
    name = p.split("/")[-1].split(".")[0]
    image_path = test_images[idx]
    mask_path = test_masks[idx]

    name = name.split("\\")[-1]
    # print("name:", name)


    if os.path.exists(image_path) and os.path.exists(mask_path):
        image = read_image(image_path)
        mask = read_image(mask_path, grayscale=True)

        # new_image_path = os.path.join(new_full_path, "test", "images")
        # new_mask_path = os.path.join(new_full_path, "test", "masks")
        new_image_path = "/content/new_data/Kvasir-SEG/test/images/"
        new_mask_path = "/content/new_data/Kvasir-SEG/test/masks/"

        image = resize(image, size)
        mask = resize(mask, size)

        img_path = new_image_path + str(name) + ".jpg"
        mask_path = new_mask_path + str(name) + ".jpg"

        # print("Validation path:",img_path)
        tmp_path = [img_path, mask_path]
        # print(tmp_path)
        # cv2.imshow('image',image)
        # cv2.waitKey(0)
        save_image(image, mask, tmp_path)


 10%|█         | 10/100 [00:00<00:00, 90.30it/s]

name: cju5ekty5ckzf07550c9u3ckk
name: cju7da88w2eod0755wejzynvt
name: cju7dda8w2br20818zhsuz8s7
name: cju76erapykj30871x5eaxh4q
name: cju3v56bwgy8v0871w14pz8fx
name: cju77t0razbvm080106o56289
name: cju6v4szov55u0871qmqz3v8n
name: cju5cetivauok0987ok3e5bre
name: cju87vqa0ndwg0850onjdz7ol
name: cju2hugv9vget0799hhk7ksvg
name: cju83rcnzkbsj0755x5anfrcg
name: cju1d31sp4d4k0878r3fr02ul
name: cju6vvxsev9y30987kespucdg
name: cju8dn0c3u2v50801k8rvq02f
name: cju8bpctzrqkr0850zeldv9kt


 29%|██▉       | 29/100 [00:00<00:00, 80.03it/s]

name: cju8abobpqbir08189u01huru
name: cju2yyhsp933j0855hp32e012
name: cju88trl3ogi208716qvti51b
name: cju7dz5yy2i7z0801ausi7rna
name: cju30qbm1ad3x0855znuhpz9u
name: cju5i5oh2efg60987ez6cpf72
name: cju2p0eveqtdc0835gpi3p93i
name: cju34repocy5208780gswillm
name: cju5f0dezct4q08183ydw11dx
name: cju2rxm8rpbaf0993o3qr2oph
name: cju2trbpkv0c00988hxla5dzz
name: cju1f8w0t65en0799m9oacq0q
name: cjyzufihqquiw0a46jatrbwln
name: cju8aj01yqeqm0850lhdz3xdw
name: cju8c2rqzs5t80850d0zky5dy
name: cju32zhbnc1oy0801iyv1ix6p
name: cju2i03ptvkiu0799xbbd4det
name: cju6ut4l8va6y0755tyw3vfqq
name: cju5bbtwsa8cl0987wgfsqpao


 48%|████▊     | 48/100 [00:00<00:00, 83.38it/s]

name: cju34ouumcznz07996gg1xq7v
name: cju2r91dg2k090801bh0xzbxk
name: cju7aez2x1jtj0871ztezs3oi
name: cju5vxuc5loxw0818u8xgf45p
name: cju8aeei7q8k308173n9y4klv
name: cju1bm8063nmh07996rsjjemq
name: cju31w6goazci0799n014ly1q
name: cju1expq45zst0855rjqwwj4m
name: cju30ia8da2bq0799klnehml2
name: cju8doa16u5gh0818w1ywda3q
name: cju2zwg05a0oy0801yr73ig7g
name: cju2ysg748ru80878sp6j0gm0
name: cju8b7aqtr4a00987coba14b7
name: cju5x15djm7ae0755h8czf6nt
name: cju3ykamdj9u208503pygyuc8


 67%|██████▋   | 67/100 [00:00<00:00, 86.62it/s]

name: cju85c2d4ln1b0755zz1z3onx
name: cju5ufn3skquf0818dhapnhba
name: cju88oh0po9gq0801nge4tgr1
name: cju2xjz2ju8pe0993ysv9wg17
name: cju5vcmrqla7i0817x4sp4pqw
name: cju15l5ubz9yh0855b3ivdpse
name: cju16d65tzw9d0799ouslsw25
name: cju5yjq1pmlgc0801z0t24bly
name: cju0tl3uz8blh0993wxvn7ly3
name: cju2sszfq3uye0878sucelzk2
name: cju843yjskhq30818qre4rwm2
name: cju2zpw4q9vzr0801p0lysjdl
name: cju1ats0y372e08011yazcsxm
name: cju5hl8nee8a40755fm8qjj0o
name: cju2rqo702wpx0855fn7d5cxh
name: cju5xjn5mm78b09871spyqhhr
name: cju7ajnbo1gvm098749rdouk0
name: cju76lsehyia10987u54vn8rb
name: cju886ryxnsl50801r93jai7q
name: cju1cnnziug1l0835yh4ropyg


 86%|████████▌ | 86/100 [00:01<00:00, 86.62it/s]

name: cju5hqz50e7o90850e0prlpa0
name: cju1fmsyf6gxb0801cimx2gle
name: cju45qbf3n9sa0987oonbkly9
name: cju5wj0faly5008187n6530af
name: cju7ddtz729960801uazp1knc
name: cju2zkpdl9h7t0799ix60teqg
name: cju2zgbj9zmrw0835nnlzxj4c
name: cju6ywm40wdbo0987pbftsvtg
name: cju17otoe119u0799nqcbl8n1
name: cju5wi6bqlxy90755bu227nvb
name: cju30ajhw09sx0988qyahx9s8
name: cju77u1sjz77b0817ft44r3fk
name: cju1cbokpuiw70988j4lq1fpi
name: cju2yljr0yzhw0988ecf271ly
name: cju1cdxvz48hw0801i0fjwcnk
name: cju30ov1oah920801mi8thuyg
name: cju5i39mreass0817au8p22zy
name: ck2bxw18mmz1k0725litqq2mc


100%|██████████| 100/100 [00:01<00:00, 85.28it/s]

name: cju7bduyq1rjf08719giru9ho
name: cju7druhp2gp308715i6km7be
name: cju31ugmfb3dz0855xtqshki6
name: cju358pwtdby20878cg7nm0np
name: cju414lf2l1lt0801rl3hjllj
name: cju5enq1tcn1i0755hnkon787
name: cju2sggy13na70855tbeoqgha
name: cju7f6cqy2ur20818t1saazbm
name: cju3ya7goj6at0818v2l5ay7f
name: cju7ecl9i2i060987xawjp4l0
name: cju1efbr0rqxz09931z0lf4vf
name: cju1c4fcu40hl07992b8gj0c8
name: cju84ffdzkrjn08183jh1fxmb





In [22]:
## Testing images and masks
for idx, p in tqdm(enumerate(valid_images), total=len(valid_images)):
    ## Path
    name = p.split("/")[-1].split(".")[0]
    image_path = valid_images[idx]
    mask_path = valid_masks[idx]

    name = name.split("\\")[-1]
    # print("name:", name)

    if os.path.exists(image_path) and os.path.exists(mask_path):
        image = read_image(image_path)
        mask = read_image(mask_path, grayscale=True)

        # new_image_path = os.path.join(new_full_path, "valid", "images\")
        # new_mask_path = os.path.join(new_full_path, "valid", "masks\")

        new_image_path = "/content/new_data/Kvasir-SEG/valid/images/"
        new_mask_path = "/content/new_data/Kvasir-SEG/valid/masks/"

        image = resize(image, size)
        mask = resize(mask, size)

        img_path = new_image_path + str(name) + ".jpg"
        mask_path = new_mask_path + str(name) + ".jpg"
        tmp_path = [img_path, mask_path]
        save_image(image, mask, tmp_path)

100%|██████████| 100/100 [00:01<00:00, 92.36it/s]


In [23]:
## Training images and masks
for idx, p in tqdm(enumerate(train_images), total=len(train_images)):
    ## Path
    name = p.split("/")[-1].split(".")[0]
    image_path = train_images[idx]
    mask_path = train_masks[idx]

    name = name.split("\\")[-1]
    # print("name:", name)

    if os.path.exists(image_path) and os.path.exists(image_path):
        image = read_image(image_path)
        mask = read_image(mask_path, grayscale=True)

        ## Augment
        image1, mask1 = center_crop(image, mask, crop_size, size)
        image2, mask2 = random_crop(image, mask, crop_size, size)
        image3, mask3 = horizontal_flip(image, mask, size)
        image4, mask4 = vertical_flip(image, mask, size)
        image5, mask5 = scale_augmentation(image, mask, (512, 768), crop_size, size)
        image6, mask6 = random_rotation(image, mask, size)
        image7, mask7 = cutout(image, mask, 256)
        ## Extra Cropping
        image8, mask8 = random_crop(image, mask, crop_size, size)
        image9, mask9 = random_crop(image, mask, crop_size, size)
        ## Extra Scale Augmentation
        image10, mask10 = scale_augmentation(image, mask, (540, 820), crop_size, size)
        image11, mask11 = scale_augmentation(image, mask, (720, 1024), crop_size, size)
        ## Extra Rotation
        image12, mask12 = random_rotation(image, mask, size)
        image13, mask13 = random_rotation(image, mask, size)
        ## Brightness
        image14, mask14 = brightness_augment(image, mask, factor=0.3)
        image15, mask15 = brightness_augment(image, mask, factor=0.6)
        image16, mask16 = brightness_augment(image, mask, factor=0.9)
        ## More Rotation
        image17, mask17 = random_rotation(image, mask, size)
        image18, mask18 = random_rotation(image, mask, size)
        ## More Random Crop
        image19, mask19 = random_crop(image, mask, crop_size, size)
        image20, mask20 = random_crop(image, mask, crop_size, size)
        ## More Cutout
        image21, mask21 = cutout(image, mask, 256)
        image22, mask22 = cutout(image, mask, 256)
        ## Grayscale
        image23, mask23 = rgb_to_grayscale(image, mask)
        image24, mask24 = rgb_to_grayscale(image1, mask1)
        image25, mask25 = rgb_to_grayscale(image2, mask2)
        image26, mask26 = rgb_to_grayscale(image3, mask3)
        image27, mask27 = rgb_to_grayscale(image4, mask4)
        image28, mask28 = rgb_to_grayscale(image5, mask5)
        image29, mask29 = rgb_to_grayscale(image15, mask15)
        image30, mask30 = rgb_to_grayscale(image16, mask16)

        ## Original image and mask
        image = resize(image, size)
        mask = resize(mask, size)

        ## All images and masks
        all_images = [image, image1, image2, image3, image4, image5, image6, image7,
            image8, image9, image10, image11, image12, image13, image14, image15, image16,
            image17, image18, image19, image20, image21, image22,
            image23,image24, image25, image26, image27, image28, image29, image30
            ]
        all_masks  = [mask, mask1, mask2, mask3, mask4, mask5, mask6, mask7, mask8,
            mask9, mask10, mask11, mask12, mask13, mask14, mask15, mask16,
            mask17, mask18, mask19, mask20, mask21, mask22,
            mask23, mask24, mask25, mask26, mask27, mask28, mask29, mask30
            ]

        ## Save the images and masks
        # new_image_path = os.path.join(new_full_path, "train", "images\\")
        # new_mask_path = os.path.join(new_full_path, "train", "masks\\")
        new_image_path = "/content/new_data/Kvasir-SEG/train/images/"
        new_mask_path = "/content/new_data/Kvasir-SEG/train/masks/"

        for j in range(len(all_images)):
            img_path = new_image_path + str(name) + "_" + str(j) + ".jpg"
            msk_path = new_mask_path + str(name) + "_" + str(j) + ".jpg"

            img = all_images[j]
            msk = all_masks[j]
            path = [img_path, msk_path]

            save_image(img, msk, path)

100%|██████████| 800/800 [25:21<00:00,  1.90s/it]


In [24]:
!zip -r /content/data.zip /content/new_data

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/new_data/Kvasir-SEG/train/images/cju32qr9tbvsj08013pkpjenq_22.jpg (deflated 2%)
  adding: content/new_data/Kvasir-SEG/train/images/cju83k8fyjsxr0817d6nxs6r4_20.jpg (deflated 1%)
  adding: content/new_data/Kvasir-SEG/train/images/cju5buy2bal250818ipl6fqwv_29.jpg (deflated 2%)
  adding: content/new_data/Kvasir-SEG/train/images/cju2hlm19vjjf0801o69qnber_21.jpg (deflated 2%)
  adding: content/new_data/Kvasir-SEG/train/images/cju7f5ghb2r5s0801chwkxxh9_7.jpg (deflated 1%)
  adding: content/new_data/Kvasir-SEG/train/images/cju2trtjf4qjd0878a2zle9v9_10.jpg (deflated 1%)
  adding: content/new_data/Kvasir-SEG/train/images/cju7f4sc62xqj075597xpmuoy_19.jpg (deflated 1%)
  adding: content/new_data/Kvasir-SEG/train/images/cju1871y11d6r0799k6cw4yze_12.jpg (deflated 3%)
  adding: content/new_data/Kvasir-SEG/train/images/cju7awzmu1ncs0871hziy65zx_22.jpg (deflated 2%)
  adding: content/new_data/Kvasir-SEG/train/images/cju

In [25]:
from google.colab import files
files.download('/content/data.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>