## Augmentation of data

In [11]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader 
from torchvision import transforms
from torch.autograd import Variable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
from PIL  import Image, ImageOps
import time

In [12]:
class Reshape(object):
    def __call__(self, sample):
        image , label = sample['image'], sample['label']
        image = image.resize((120,120), Image.ANTIALIAS)
        return {'image': image, 'label': label}

In [13]:
class roi(object):
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        size = (random.randint(120,240),random.randint(120,240))
        image = image.resize(size, Image.ANTIALIAS)
        size = image.size
        if size[0]>=120:
            width = random.randint(120, size[0])
        else:
            width = size[0]
        if size[1]>=120:
            height = random.randint(120, size[1])
        else:
            height = size[1]
        x = random.randint(0,size[0]-width)
        y = random.randint(0, size[1]-height)
        image = image.crop((x,y,x+width, y+height))
        return {'image': image, 'label': label}

In [14]:
class flip(object):
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        if random.randint(1,100)< 10:
            image = ImageOps.flip(image)
        return {'image': image, 'label': label}

In [15]:
class mirror(object):
    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        if random.randint(0,1)== 1:
            image = ImageOps.mirror(image)
        return {'image': image, 'label': label}

In [16]:
class CatDogDataset(Dataset):
    
    def __init__(self, root_dir, file,  transform=None, isval=False):
        self.root_dir = root_dir
        self.transform = transform
        if isval==True:
            self.val_size = 2000
        if(os.path.exists(os.path.join(self.root_dir, file))):
            self.csv = pd.read_csv(os.path.join(self.root_dir, file), header=None)
            if isval==True:
                self.csv = self.csv[:self.val_size]
            print("Found the csv!!..")
        else:
            self.create(os.path.join(self.root_dir, file))
            print("Not Found the csv!!..")
            
    def create(self, csv_path):
        dataset = np.array([])
        classes = os.listdir(self.root_dir)
        self.csv = []
        for i in range(len(classes)):
            self.csv.extend([[os.path.join(classes[i],j), i]for j in os.listdir(os.path.join(self.root_dir, classes[i]))])
        random.shuffle(self.csv)
        self.csv = pd.DataFrame(self.csv)
        self.csv.to_csv(csv_path, index=False, header = False)
    
    def numcatdog(self):
        count=0
        for i in range(len(self.csv)):
            if self.csv.iloc[i,1]==1:
                count +=1
        return count
    
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        image_name = os.path.join(self.root_dir, self.csv.iloc[index, 0])
        image_label = self.csv.iloc[index, 1]
        image = Image.open(image_name)
        sample = {'image':image, 'label':image_label}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

In [17]:
compose = transforms.Compose([Reshape()])
trainset = CatDogDataset(r'dataset\training_set', 'training_data.csv', compose)
print("Training set len: ", len(trainset))

Found the csv!!..
Training set len:  8000


In [None]:
s = np.array([])
im_name = 0
for i in range(7):
    for i_batch, sample in enumerate(trainset):
        name = str(im_name)+'.jpg'
        sample['image'].save('dataset\\augmentation_set\\augmentation_image\\'+name)
        if len(s)==0:
            s = np.array([[name,sample['label']]])
        else:
            s = np.append(s, np.array([[name,sample['label']]]), axis=0)
        im_name+=1
s = pd.DataFrame(s)

In [19]:
im_name

48000

In [9]:
s

Unnamed: 0,0,1
0,0.jpg,1
1,1.jpg,1
2,2.jpg,0
3,3.jpg,0
4,4.jpg,0
...,...,...
47995,47995.jpg,1
47996,47996.jpg,1
47997,47997.jpg,1
47998,47998.jpg,1


In [10]:
s.to_csv(r'dataset\\augmentation_set\\augmentation.csv', index=False, header=False)