# **Import**

In [0]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import cv2 as cv
from google.colab.patches import cv2_imshow
import os
import random
import math
from google.colab import drive
import pickle

# **Mount**

In [0]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Loading Data**

Defined three functions that load images that are looking at the left, the right and the origin.

Within these functions, the images are resized from 300\*300 to 100\*100, and then stored into a list.

The left images are annotated as 0, the right as 1 and the origin as 2, which are the index of CATEGORY for the corresponding value.

In [0]:
CATEGORY = ['Left','Right','Origin']
DIR = '/content/drive/My Drive/ImageData'


def load_left_data():
  left_images = []
  for i in range(1,9):
    path = os.path.join(DIR, 'Left', str(i))
    for img in os.listdir(path):
      im = cv.imread(os.path.join(path, img), 0)
      im = cv.resize(im, (100, 100))
      left_images.append([im, 0])
  return left_images


def load_right_data():
  right_images = []
  for i in range(1,9):
    path = os.path.join(DIR, 'Right', str(i))
    for img in os.listdir(path):
      im = cv.imread(os.path.join(path, img), 0)
      im = cv.resize(im, (100, 100))
      right_images.append([im, 1])
  return right_images


def load_origin_data():
  origin_images = []
  path = os.path.join(DIR, 'Origin')
  for img in os.listdir(path):
    im = cv.imread(os.path.join(path, img), 0)
    im = cv.resize(im, (100, 100))
    origin_images.append([im, 2])
  return origin_images

    The number of left images is 365.
    The number of right images is 438.
    The number of origin images is 58

# **Split Data**

A random 10% of the images from left list, right list and origin list are moved to test set, with the rest moved to training set.

In [0]:
def split():
  
  left_images = load_left_data()
  left_len = len(left_images)
  right_images = load_right_data()
  right_len = len(right_images)
  origin_images = load_origin_data()
  origin_len = len(origin_images)
  test_set = []
  training_set = []
  
  count = left_len
  for i in range(1, math.floor(0.1*left_len)):
    index = random.randint(0, count-1)
    test_set.append(left_images.pop(index))
    count = count-1
  count = right_len
  for i in range(0, math.floor(0.1*right_len)):
    index = random.randint(0, count-1)
    test_set.append(right_images.pop(index))
    count = count-1
  count = origin_len
  for i in range(0, math.floor(0.1*origin_len)):
    index = random.randint(0, count-1)
    test_set.append(origin_images.pop(index))
    count = count-1
  training_set = left_images + right_images + origin_images
  return training_set, test_set

# **Shuffle**

Both training set and test set are being shuffled, so that the images are not continuous on one category, it helps making the model less prejudiced.

In [0]:
training_set, test_set = split()
random.shuffle(training_set)
random.shuffle(test_set)

# **Prepare Training Set**

1.   Separate the features (stored in list X) and the label (stored in list y) in the training set.
2.   Wrap the lists into Numpy arrays.
3.   Reshape the arrays.



In [0]:
X = []
y = []


for image, label in training_set:
  X.append(image)
  y.append(label)

  
X = np.array(X).reshape(-1, 100, 100, 1)
y = np.array(y).reshape(-1)

# **Store Training Set**

Store training set feature list X and label list y to path '/content/drive/My Drive/Data'

In [0]:
out = open('/content/drive/My Drive/Data/X.pickle', 'wb')
pickle.dump(X, out)
out.close()


out = open('/content/drive/My Drive/Data/y.pickle', 'wb')
pickle.dump(y, out)
out.close()