## Preprocess data

In [None]:
import os
from os import listdir
from os.path import isfile, join
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import sys

### Resize images to 69x69x3 and add data augmentation techniques

In [None]:
def resize_images(img_path, conv_path):
    """
    Resize images with 69*69*3
    """

    if not os.path.exists(conv_path):
        os.makedirs(conv_path)
        print("\nNew Directory created")
    onlyfiles = [f for f in listdir(img_path) if isfile(join(img_path, f))]
    print("Total files in input dir: {}".format(len(onlyfiles)))
    for i,f in enumerate(onlyfiles):
        #print(i)
        #print(f)
        #print(img_path+'/'+f)
        img = Image.open(img_path+'/'+f)
        img = img.resize((69,69), Image.ANTIALIAS)
        newPath = conv_path+'/'+str(i)+'.jpeg'
        sys.stdout.write("\r {}".format(newPath))
        sys.stdout.flush()
        img.save(newPath)
        im = cv2.imread(newPath)
        # copy image to display all 4 variations
        horizontal_img = im.copy()
        vertical_img = im.copy()
        both_img = im.copy()

        # flip img horizontally, vertically,
        # and both axes with flip()
        horizontal_img = cv2.flip( im, 0 )
        vertical_img = cv2.flip( im, 1 )
        both_img = cv2.flip( im, -1 )
        
        cv2.imwrite(conv_path+'/'+str(i)+'_1.jpeg',horizontal_img) 
        cv2.imwrite(conv_path+'/'+str(i)+'_2.jpeg',vertical_img) 
        cv2.imwrite(conv_path+'/'+str(i)+'_3.jpeg',both_img) 
        if(im.shape[2] != 3):
            print(im.shape)


### Convert images into list

In [None]:
def convert_images(img_path, label):
    """
    Convert images into list
    """
    onlyfiles = [f for f in listdir(img_path) if isfile(join(img_path,f))]

    data = []
    label = []
    rgb = []
    for i, f in enumerate(onlyfiles):
        img = cv2.imread(img_path+'/'+f)
        b,g,r = cv2.split(img)
        img2 = cv2.merge([r,g,b])
        rgb.append(img2)
        data.append(img)
        label.append(label)
    
    return rgb, label

### Find the 10% data 

In [None]:

def find_ten_percent(data):
    """
    calculate 10% of data
    """

    return int(len(data) * 0.1)

### Resize all images in images directory and store it in converted

In [None]:
folders = [name for name in os.listdir('./test_images')
            if os.path.isdir(os.path.join('./test_images', name))]
for d in folders:
    img_dir = './test_images/'+d
    conv_dir = './converted_test/'+d
    resize_images(img_dir, conv_dir)

### Convert images in converted directory to list

In [None]:
folders = [name for name in os.listdir('./converted_test')
            if os.path.isdir(os.path.join('./converted_test', name))]
train_list = []
test_list = []
train_list_label = []
test_list_label = []
for di, d in enumerate(folders):
    img_path = './converted_test/'+d
    onlyfiles = [f for f in listdir(img_path) if isfile(join(img_path,f))]
    print(len(onlyfiles))
    data = []
    label = []
    rgb = []
    for i, f in enumerate(onlyfiles):
        img = cv2.imread(img_path+'/'+f)
        b,g,r = cv2.split(img)
        img2 = cv2.merge([r,g,b])
        rgb.append(img2)
        data.append(img)
        label.append(di)
    #print(data)
    print(len(data))
    splitCount = int(len(rgb) * 0.1)
    print(len(rgb[:-splitCount]))
    print(len(rgb[-splitCount:]))
    pumtrain_list = (rgb[:-splitCount])
    pumtest_list = (rgb[-splitCount:])
    pumtrain_label_list = (label[:-splitCount])
    pumtest_label_list = (label[-splitCount:])
    print(len(pumtrain_list),' ',len(pumtrain_label_list),' test: ',len(pumtest_list),' ',len(pumtest_label_list))
    train_list += pumtrain_list
    test_list += pumtest_list
    train_list_label += pumtrain_label_list
    test_list_label += pumtest_label_list
    print('Total train data: ',len(train_list), 'total test ',(len(test_list)))
    print('Total train label: ',len(train_list_label), 'total test label',(len(test_list_label)))

In [None]:
train_list += test_list
train_list_label += test_list_label

### Shuffling the data

In [None]:
shuffle_train = []
for i,r in enumerate(train_list):
    d = []
    d.append(r)
    d.append(train_list_label[i])
    shuffle_train.append(d)
print('loop')
for i in range(0, 50):
    print(shuffle_train[i][1])

In [None]:
set(test_list_label)

In [None]:
from random import shuffle
shuffle(shuffle_train)
print('loop')
for i in range(50, 100):
    print(shuffle_train[i][1])

In [None]:
train_list = []
train_list_label = []
for i, d in enumerate(shuffle_train):
    train_list.append(d[0])
    train_list_label.append(d[1])
len(train_list)

In [None]:
train_data = np.array(train_list)
train_label = np.array(train_list_label)
test_data = np.array(test_list)
test_label = np.array(test_list_label)
train_data.shape

In [None]:
data = {'train': train_data, 'train_labels': train_label, 'test':test_data, 'test_labels': test_label}

### Test images and labels

In [None]:
print('display image: ',train_label[100])
plt.axis('off')
plt.imshow(train_data[100])
plt.show()

### Write data into pickle file

In [None]:
import pickle


with open('new_data.pkl', 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('new_data.pkl', 'rb') as handle:
    b = pickle.load(handle)


In [None]:
print(len(b['test']))
print(len(b['test_labels']))
print(len(b['train']))
print(len(b['train_labels']))