<a href="https://colab.research.google.com/github/SonOf1998/ProblemSet4/blob/main/ps4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First install the package that makes it easy to get the required images.

In [1]:
!pip install openimages

Collecting openimages
  Downloading https://files.pythonhosted.org/packages/49/ba/587944c183999aa9a0416d6979739b78adfe021eee74aa9db78f0beaea06/openimages-0.0.1-py2.py3-none-any.whl
Collecting boto3
[?25l  Downloading https://files.pythonhosted.org/packages/85/54/099a2ea5d4b2d5931a26f280a7585f613b1fafaac9189e489a9e25004a01/boto3-1.16.13-py2.py3-none-any.whl (129kB)
[K     |████████████████████████████████| 133kB 5.3MB/s 
Collecting cvdata
[?25l  Downloading https://files.pythonhosted.org/packages/47/e5/5361375b284ac1da759cf78329f8484cb33c039c4c91e38862ca4cba2ae6/cvdata-0.0.7-py2.py3-none-any.whl (49kB)
[K     |████████████████████████████████| 51kB 4.9MB/s 
Collecting jmespath<1.0.0,>=0.7.1
  Downloading https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl
Collecting s3transfer<0.4.0,>=0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547

In [2]:
from openimages.download import download_images
import os
import shutil

In [3]:
# removes every directory from the directory given in the parameter
def clear_workdir(workdir):
  for filename in os.listdir(workdir):
    filepath = os.path.join(workdir, filename)
    if os.path.isdir(filepath):
      shutil.rmtree(filepath)

# creates empty directories for training, validation and testing data
def make_set_directory(set_name, classes):
  os.mkdir(set_name)
  for cls in classes:
    os.mkdir(os.path.join(set_name, cls))

In [4]:
workdir = os.getcwd()
clear_workdir(workdir)

# These are the classes I selected for the exercise..
# For whatever reason download_images() fails if I don't
# use upper case for the initial letter of the class' strings
classes = ["Car", "Bus", "Train"]
download_images(workdir, classes, exclusions_path=None, limit=600)

# Converts class strings to lowercase letters
# as download_images() make dirs with only lowercase names
for i in range(len(classes)):
  classes[i] = classes[i].lower()

set_dirs = ["training", "validation", "testing"]
for set_dir in set_dirs:
  make_set_directory(set_dir, classes)

nb_training = 400
nb_validation = 100
nb_testing = 100

for cls in classes:
  path_to_class = os.path.join(cls, "images")
  for i, filename in enumerate(os.listdir(path_to_class)):
    full_path_to_pic = os.path.join(path_to_class, filename)
    if i < nb_training:
      shutil.move(full_path_to_pic, os.path.join(workdir, set_dirs[0], cls, filename))
    elif i < nb_training + nb_validation:
      shutil.move(full_path_to_pic, os.path.join(workdir, set_dirs[1], cls, filename))
    else:
      shutil.move(full_path_to_pic, os.path.join(workdir, set_dirs[2], cls, filename))
  
  # we moved every picture to our train/valid/test set
  # so we can delete the empty directory
  shutil.rmtree(os.path.join(workdir, cls))



2020-11-08  16:10:12 INFO NumExpr defaulting to 2 threads.
2020-11-08  16:10:15 INFO Downloading 600 train images for class 'car'
100%|██████████| 600/600 [00:13<00:00, 43.66it/s]
2020-11-08  16:10:29 INFO Downloading 600 train images for class 'bus'
100%|██████████| 600/600 [00:14<00:00, 41.70it/s]
2020-11-08  16:10:43 INFO Downloading 600 train images for class 'train'
100%|██████████| 600/600 [00:13<00:00, 43.20it/s]


In [5]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.callbacks import ModelCheckpoint, History, EarlyStopping

In [6]:
img_height = 256     # Input image height
img_width  = 256     # Input image width
batch_size = 32
class_mode = 'categorical'
color_mode = 'rgb' 

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_iter = train_datagen.flow_from_directory(
    os.path.join(workdir, set_dirs[0]),
    target_size=(img_height, img_width),
    batch_size = batch_size,
    class_mode = class_mode,
    color_mode = color_mode
)

validation_iter = train_datagen.flow_from_directory(
    os.path.join(workdir, set_dirs[1]),
    target_size=(img_height, img_width),
    batch_size = batch_size,
    class_mode = class_mode,
    color_mode = color_mode
)

test_iter = test_datagen.flow_from_directory(
    os.path.join(workdir, set_dirs[2]),
    target_size=(img_height, img_width),
    batch_size = 1,
    class_mode = class_mode,
    color_mode = color_mode
)

Found 1200 images belonging to 3 classes.
Found 300 images belonging to 3 classes.
Found 300 images belonging to 3 classes.


In [7]:
base_model = InceptionV3(input_shape=(img_height, img_width, 3),
                    weights="imagenet",
                    include_top=False,
                    classes=3)

inputs = Input(shape=(img_height, img_width, 3))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation="relu")(x)
x = Dense(512, activation="relu")(x)
x = Dense(256, activation="relu")(x)
outputs = Dense(3, activation="softmax")(x)

model = Model(inputs, outputs)

checkpoint = ModelCheckpoint("chk.chk", save_weights_only=True, save_best_only=True, monitor="val_accuracy", verbose=1)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
base_model.trainable = False
model.compile(optimizer="rmsprop", loss="mse", metrics=["acc"])

model.fit(
    train_iter,
    epochs=30,
    validation_data=validation_iter,
    callbacks=[checkpoint]) 

Epoch 1/30



Epoch 2/30

KeyboardInterrupt: ignored

In [None]:
base_model.trainable = True
model.compile(optimizer="rmsprop", loss="mse", metrics=["acc"])

model.fit(
    train_iter,
    epochs=30,
    validation_data=validation_iter,
    callbacks=[checkpoint]) 

In [None]:
loss, acc = model.evaluate(test_iter)
print(acc)