In [1]:
import os
from skimage import io, transform
from glob import glob
import numpy as np
from sklearn.model_selection import train_test_split
from random import randint

train_dir = '../train'
test_dir = '../test1'
image_size = 64

In [2]:
# read the images
# Training Images
c_train_files_path = os.path.join(train_dir, 'cat*.jpg')
c_train_files=sorted(glob(c_train_files_path))
d_train_files_path = os.path.join(train_dir, 'dog*.jpg')
d_train_files=sorted(glob(d_train_files_path))
no_train_files = len(c_train_files) + len(d_train_files)
#print(no_train_files)

train_data = np.zeros((no_train_files*5, image_size, image_size, 3), dtype='float32')
train_labels = np.zeros((no_train_files*5,2), dtype='float32')

if(no_train_files == 0):
    print("ERROR: Number of files is zero.")
    
# convert images to numpy arrays and add labels to training data
count = 0
for f in c_train_files:
        img = io.imread(f)
        new_img = transform.resize(img, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 1.0
        train_labels[count][1] = 0.0
        count += 1
        
        img_rot = transform.rotate(img, angle=randint(0, 180)) #random rotation between 0 and 180
        new_img = transform.resize(img_rot, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 1.0
        train_labels[count][1] = 0.0
        count += 1
        
        img_rot = transform.rotate(img, angle=randint(180, 360)) # random rotation between 180 and 360
        new_img = transform.resize(img_rot, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 1.0
        train_labels[count][1] = 0.0
        count += 1
        
        afine_tf = transform.AffineTransform(shear=0.2) #shear > like making rectangle parellelogram 
        img_af = transform.warp(img, inverse_map=afine_tf)
        new_img = transform.resize(img_af, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 1.0
        train_labels[count][1] = 0.0
        count += 1
        
        afine_tf = transform.AffineTransform(shear=-0.1)
        img_af = transform.warp(img, inverse_map=afine_tf)
        new_img = transform.resize(img_af, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 1.0
        train_labels[count][1] = 0.0
        count += 1
        
for f in d_train_files:
        img = io.imread(f)
        new_img = transform.resize(img, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 0.0
        train_labels[count][1] = 1.0
        count += 1
        
        img_rot = transform.rotate(img, angle=randint(0, 180))
        new_img = transform.resize(img_rot, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 0.0
        train_labels[count][1] = 1.0
        count += 1
        
        img_rot = transform.rotate(img, angle=randint(180, 360))
        new_img = transform.resize(img_rot, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 0.0
        train_labels[count][1] = 1.0
        count += 1
        
        afine_tf = transform.AffineTransform(shear=0.2)
        img_af = transform.warp(img, inverse_map=afine_tf)
        new_img = transform.resize(img_af, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 0.0
        train_labels[count][1] = 1.0
        count += 1
        
        afine_tf = transform.AffineTransform(shear=-0.4)
        img_af = transform.warp(img, inverse_map=afine_tf)
        new_img = transform.resize(img_af, (image_size, image_size, 3),mode='constant')
        train_data[count] = np.array(new_img)
        train_labels[count][0] = 0.0
        train_labels[count][1] = 1.0
        count += 1

In [3]:
X, X_temp, Y, Y_temp = train_test_split(train_data, train_labels, test_size=0.05, random_state=42)
X_valid, X_test, Y_valid, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

In [4]:
np.save('train_data_' + str(image_size) + '.npy',X)
np.save('train_labels_' + str(image_size) + '.npy',Y)
np.save('valid_data_' + str(image_size) + '.npy',X_valid)
np.save('valid_labels_' + str(image_size) + '.npy',Y_valid)
np.save('test_data_' + str(image_size) + '.npy',X_test)
np.save('test_labels_' + str(image_size) + '.npy',Y_test)