In [None]:
# Select TF2.x as version
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from keras.preprocessing.image import image
from keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras.applications import ResNet50V2
import os
from shutil import copy2
from random import sample, seed
import gc

from google.colab import drive

Using TensorFlow backend.


In [None]:
# Class labels in imagenet corresponding to cars
CAR_CLASSES = ['minivan', 'limousine', 'sports_car', 'convertible', 'cab', 'racer', 'passenger_car',
               'recreational_vehicle', 'pickup', 'police_van', 'minibus', 'moving_van', 'tow_truck', 'jeep',
               'landrover', 'beach_wagon']

CAR_IDX = [656, 627, 817, 511, 468, 751, 705, 757, 717, 734, 654, 675, 864, 609, 436]

In [None]:
# Mount Google Drive for data
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
!mkdir data
!mkdir data/raw
!cp drive/My\ Drive/CarClassification/dataset.zip data

In [None]:
!mkdir data/filtered

!unzip -q data/dataset.zip -d data/raw

In [None]:
# Good values below 0.2, else too many cars out
THRESH = 0.1
IMAGE_DIR = 'data/raw/'  # Directory images are stored in
STORAGE_DIR = 'filtered/'  # Directory to store split images

In [None]:
def is_car_acc_prob(predictions, thresh=THRESH, car_idx=CAR_IDX):
    """
    Determine if car on image by accumulating probabilities of car prediction and comparing to threshold

    Args:
        predictions: (?, 1000) matrix of probability predictions resulting from ResNet with imagenet weights
        thresh: threshold accumulative probability over which an image is considered a car
        car_idx: indices corresponding to cars

    Returns:
        np.array of booleans describing if car or not
    """
    predictions = np.array(predictions, dtype=float)
    car_probs = predictions[:, car_idx]
    car_probs_acc = car_probs.sum(axis=1)
    return car_probs_acc > thresh

In [None]:
def load_images(filepath, filenames):
    """
    Load images given in filenames to array format

    Args:
        filepath: directory files are stored in. Needed since filenames are passed relative
        filenames: filenames of images of which array representation should be returned

    Returns:
        img_array: array of images
    """
    img_array = []
    for i, file in enumerate(filenames):
        if i % 1000 == 0:
            print("#", end="")
        img = image.load_img(filepath + file, target_size=(224, 224))
        # img = tf.image.resize_with_crop_or_pad(img, target_height=224, target_width=224)
        img = image.img_to_array(img)

        img_array.append(img)

    return preprocess_input(np.asarray(img_array))

In [None]:
# Filenames of all images
files = os.listdir(IMAGE_DIR)

# seed(32)
# files = sample(files, 5000)

total_files = len(files)

print("There are {} files to be processed...".format(total_files))

There are 64467 files to be processed...


In [None]:
# Initialize ResNet Model
model = ResNet50V2(weights='imagenet')

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
# Compute predictions chunkwise with explicit garbage collection
gc.collect()
n = 7000
pred_list = []
for i in range(0, len(files), n):
  print(i)
  img_array = load_images(IMAGE_DIR, files[i:min(i+n, len(files))])
  preds = model.predict(img_array)
  pred_list.append(preds)
  del img_array
  gc.collect()

0
#######7000
#######14000
#######21000
#######28000
#######35000
#######42000
#######49000
#######56000
#######63000
##

In [None]:
preds = np.concatenate(pred_list, axis=0)

In [None]:
assert(preds.shape[0] == len(files))

In [None]:
res = is_car_acc_prob(preds)

df = pd.Series(res, index=files)

In [None]:
df.sum()

53783

In [None]:
storage_dir_ext = STORAGE_DIR + 'thresh' + str(THRESH).replace('.', '_') + '/'

df.to_csv(storage_dir_ext + 'filenames_with_car_flags_bw_added.csv')

  This is separate from the ipykernel package so we can avoid doing imports until
