In [3]:
import tensorflow as tf
import pathlib
import random
import cv2
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [4]:

data_root = pathlib.Path.home().joinpath('Documents/Github/Dissertation/Data/')
for item in data_root.iterdir():
   print(item)

all_image_paths = list(data_root.glob('*/*'))
all_image_paths = [str(path) for path in all_image_paths if "DS_Store" not in str(path)]
random.shuffle(all_image_paths)

image_count = len(all_image_paths)
label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_to_index = dict((name, index) for index, name in enumerate(label_names))
all_image_labels = []
all_image_labels = [label_to_index[pathlib.Path(path).parent.name]
                   for path in all_image_paths]

print("First 10 labels indices: ", all_image_labels[:10])

img_size = (224, 224)
def load_and_preprocess_image(image):
   image = cv2.imread(image)
   image = cv2.resize(image, img_size)
   image = image[:, :, [2, 1, 0]]
   image = image.astype('float64')
   image /= 255.0  # normalize to [0,1] range

   return image



/Users/cyril/Documents/Github/Dissertation/Data/Triple Tops
/Users/cyril/Documents/Github/Dissertation/Data/.DS_Store
/Users/cyril/Documents/Github/Dissertation/Data/Pipe Tops
/Users/cyril/Documents/Github/Dissertation/Data/Flags
/Users/cyril/Documents/Github/Dissertation/Data/Rectangle Tops
/Users/cyril/Documents/Github/Dissertation/Data/Gaps
/Users/cyril/Documents/Github/Dissertation/Data/Island Reversals
/Users/cyril/Documents/Github/Dissertation/Data/Horn Bottoms
/Users/cyril/Documents/Github/Dissertation/Data/Scallops
/Users/cyril/Documents/Github/Dissertation/Data/Pipe Bottoms
/Users/cyril/Documents/Github/Dissertation/Data/Horn Tops
/Users/cyril/Documents/Github/Dissertation/Data/Pennants
/Users/cyril/Documents/Github/Dissertation/Data/Triple Bottoms
/Users/cyril/Documents/Github/Dissertation/Data/Rectangle Bottoms
First 10 labels indices:  [5, 12, 1, 5, 1, 8, 5, 7, 1, 10]


In [7]:

for i, img in enumerate(all_image_paths):
    f = open(img.split('.')[0]+'.txt', 'a')
    content = str(all_image_labels[i]) + " 0.0 0.0 1.0 1.0"
    f.write(content)

In [None]:
import glob, os
# Current directory
current_dir = os.path.dirname(os.path.abspath(__file__))
print(current_dir)
current_dir = 'Data/'
# Percentage of images to be used for the test set
percentage_test = 20
# Create and/or truncate train.txt and test.txt
file_train = open('train.txt', 'w')  
file_test = open('test.txt', 'w')
# Populate train.txt and test.txt
counter = 1  
index_test = round(100 / percentage_test)  
for pathAndFilename in glob.iglob(os.path.join(current_dir, "*.jpg")):  
    title, ext = os.path.splitext(os.path.basename(pathAndFilename))
if counter == index_test:
        counter = 1
        file_test.write(current_dir + "/" + title + '.jpg' + "\n")
    else:
        file_train.write(current_dir + "/" + title + '.jpg' + "\n")
        counter = counter + 1

In [21]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [22]:

image_string = open(all_image_paths[0], 'rb').read()
label = all_image_labels[0]

def image_example(image_string, label):
    image_shape = tf.image.decode_jpeg(image_string).shape

    feature = {
        'height': _int64_feature(image_shape[0]),
        'width': _int64_feature(image_shape[1]),
        'depth': _int64_feature(image_shape[2]),
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(image_string),}

    return tf.train.Example(features=tf.train.Features(feature=feature))
    
for line in str(image_example(image_string, label)).split('\n')[:15]:
    print(line)
print('...')

features {
  feature {
    key: "depth"
    value {
      int64_list {
        value: 3
      }
    }
  }
  feature {
    key: "height"
    value {
      int64_list {
        value: 307
      }
...


In [8]:

X_train, X_val, y_train, y_val = train_test_split(all_image_paths, all_image_labels, test_size=0.1, random_state=3)

print(type(X_train))
print(type(y_train))
print(type(X_val))
print(type(y_val))

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>


In [9]:
y_train[0]

8

In [10]:
for name in X_train:
    f = open('train.txt', 'a')
    f.write(name)
    f.write('\n')
for name in X_val:
    f = open('test.txt', 'a')
    f.write(name)
    f.write('\n')

In [29]:
train_file = 'train_images.tfrecords'
with tf.io.TFRecordWriter(train_file) as writer:
    for i, filename in enumerate(X_train):
        image_string = open(filename, 'rb').read()
        tf_example = image_example(image_string, y_train[i])
        writer.write(tf_example.SerializeToString())

In [30]:
val_file = 'val_images.tfrecords'
with tf.io.TFRecordWriter(val_file) as writer:
    for i, filename in enumerate(X_val):
        image_string = open(filename, 'rb').read()
        tf_example = image_example(image_string, y_val[i])
        writer.write(tf_example.SerializeToString())