In [1]:
import numpy as np
import os
import pandas as pd
import torchvision.transforms as T
import torchvision.transforms.functional as F
import torch
from PIL import Image

torch.manual_seed(42)
import warnings
warnings.filterwarnings('ignore')

from matplotlib import pyplot as plt
%matplotlib inline

In [6]:
er_trdat_path = './data/train/ER/'
nr_trdat_path = './data/train/NR/'
cropdat_path = './data/train/all_cropped/'
balanced_path = './data/train/all_balanced/'
augmented_path = './data/train/all_augmented/'
testdat_path = './data/test/'
cropped_testdat_path = './data/test_cropped/'

if not os.path.exists(cropdat_path):
    os.mkdir(cropdat_path)
if not os.path.exists(balanced_path):
    os.mkdir(balanced_path)
if not os.path.exists(cropped_testdat_path):
    os.mkdir(cropped_testdat_path)
if not os.path.exists(augmented_path):
    os.mkdir(augmented_path)

### Crop Train Images

In [3]:
%%time
counts = dict()
i = 0
for filename in os.listdir(er_trdat_path):
    fn_parts = filename.split('_')
    ind =  fn_parts.index('ER')
    regr = fn_parts[ind+1]
    if regr not in {'1', '3', '6', '10', '20', '30'}:
        print(f'error fn_parts[7]')
        print(fn_parts)
        print(filename)
        break    
    img = Image.open(er_trdat_path + filename)
    tensor = F.to_tensor(img)
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(cropdat_path + f'{i}-ER-{regr}.png')
    counts[f'ER-{regr}'] = counts.get(f'ER-{regr}', 0) + 1
    i += 1
for filename in os.listdir(nr_trdat_path):
    fn_parts = filename.split('_')
    ind =  fn_parts.index('NR')
    regr = fn_parts[ind+1]
    if regr not in {'1', '3', '6', '10', '20', '30'}:
        print(f'error fn_parts[7]')
        print(fn_parts)
        print(filename)
        break    
    img = Image.open(nr_trdat_path + filename)
    tensor = F.to_tensor(img)
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(cropdat_path + f'{i}-NR-{regr}.png')
    counts[f'NR-{regr}'] = counts.get(f'NR-{regr}', 0) + 1
    i += 1

Wall time: 2min 13s


### Class Destribution

In [5]:
counts

{'ER-30': 2237,
 'ER-3': 2243,
 'ER-10': 2272,
 'ER-20': 2,
 'ER-1': 3,
 'ER-6': 1,
 'NR-6': 2255,
 'NR-20': 2208,
 'NR-1': 2177,
 'NR-10': 2,
 'NR-3': 2,
 'NR-30': 2}

### Balance Classes

In [7]:
transforms = torch.nn.Sequential(
    T.RandomAffine(degrees=45),
    T.RandomHorizontalFlip(p=0.5),
)

In [8]:
%%time
ii = 0
for filename in os.listdir(er_trdat_path):
    img = Image.open(er_trdat_path + filename)
    fn_parts = filename.split('_')
    ind = fn_parts.index('ER')
    regr = fn_parts[ind+1]    
    tensor = F.to_tensor(img)
    n = 0
    if regr == '1':
        n = 740
    if regr == '6':
        n = 2200
    if regr == '20':
        n = 1100
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(balanced_path + f'{ii}-ER-{regr}.png')
    ii += 1
    for i in range(n):        
        tr = transforms(tensor)
        tr = F.crop(tr, 192, 192, 192, 192)
        tr = F.resize(tr, [64, 64])
        img = F.to_pil_image(tr)
        img.save(balanced_path + f'{ii}-ER-{regr}.png')
        ii += 1
for filename in os.listdir(nr_trdat_path):
    img = Image.open(nr_trdat_path + filename)
    fn_parts = filename.split('_')
    ind = fn_parts.index('NR')
    regr = fn_parts[ind+1]    
    tensor = F.to_tensor(img)
    n = 0
    if regr in {'3', '10', '30'}:
        n = 1100
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(balanced_path + f'{ii}-NR-{regr}.png')
    ii += 1
    for i in range(n):        
        tr = transforms(tensor)
        tr = F.crop(tr, 192, 192, 192, 192)
        tr = F.resize(tr, [64, 64])
        img = F.to_pil_image(tr)
        img.save(balanced_path + f'{ii}-NR-{regr}.png')
        ii += 1

Wall time: 4min 57s


In [9]:
counts = dict()
for filename in os.listdir(balanced_path):
    fn_parts = filename.split('-')
    if fn_parts.count('ER'):
        cl = 'ER'
    else:
        cl = 'NR'
    regr = fn_parts[2]
    counts[f'{cl}-{regr}'] = counts.get(f'{cl}-{regr}', 0) + 1
counts

{'ER-30.png': 2237,
 'ER-3.png': 2243,
 'ER-6.png': 2201,
 'ER-10.png': 2272,
 'ER-20.png': 2202,
 'ER-1.png': 2223,
 'NR-6.png': 2255,
 'NR-20.png': 2208,
 'NR-1.png': 2177,
 'NR-10.png': 2202,
 'NR-3.png': 2202,
 'NR-30.png': 2202}

### Augment Train

In [None]:
%%time
ii = 0
for filename in os.listdir(er_trdat_path):
    img = Image.open(er_trdat_path + filename)
    fn_parts = filename.split('_')
    ind = fn_parts.index('ER')
    regr = fn_parts[ind+1]    
    tensor = F.to_tensor(img)
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(augmented_path + f'{ii}-ER-{regr}.png')
    ii += 1
    for angle in range(1, 360):        
        tr = F.affine(tensor, angle, (0, 0), 1, 0)
        tr = F.crop(tr, 192, 192, 192, 192)
        tr = F.resize(tr, [64, 64])
        img = F.to_pil_image(tr)
        img.save(augmented_path + f'{ii}-ER-{regr}.png')
        ii += 1
for filename in os.listdir(nr_trdat_path):
    img = Image.open(nr_trdat_path + filename)
    fn_parts = filename.split('_')
    ind = fn_parts.index('NR')
    regr = fn_parts[ind+1]    
    tensor = F.to_tensor(img)
    tr = F.crop(tensor, 192, 192, 192, 192)
    tr = F.resize(tr, [64, 64])
    img = F.to_pil_image(tr)
    img.save(augmented_path + f'{ii}-ER-{regr}.png')
    ii += 1
    for angle in range(1, 360):        
        tr = F.affine(tensor, angle=angle)
        tr = F.crop(tr, 192, 192, 192, 192)
        tr = F.resize(tr, [64, 64])
        img = F.to_pil_image(tr)
        img.save(augmented_path + f'{ii}-NR-{regr}.png')
        ii += 1

### Crop Test