In [1]:
import tensorflow as tf

In [2]:
# notice I have fewer layers and dropout layers after each maxpool layer
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 200x200 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(200, 200, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),
    # The second convolution
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. It will contain a value from 0-1 
    tf.keras.layers.Dense(1, activation='sigmoid')])

In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 198, 198, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 99, 99, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 99, 99, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 97, 97, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 48, 48, 32)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 48, 48, 32)        0

In [4]:
RMSprop = tf.keras.dtensor.experimental.optimizers.RMSprop
# could also use Adam here 
model.compile(loss='binary_crossentropy',optimizer=RMSprop(),metrics='accuracy')


In [20]:
training_set = tf.keras.utils.image_dataset_from_directory(
  'birds',
  seed=123,
  image_size=(200, 200),
  subset='training',
  validation_split=0.3,
  batch_size=5)

validation_set = tf.keras.utils.image_dataset_from_directory(
  'birds',
  shuffle=True,
  seed=17,
  image_size=(200, 200),
  validation_split=0.3,
  subset='validation',
  batch_size=5)
 
# predictions and labels only seem to line up if shuffle is false ... 
# so you can make a test folder and grab all the images in that folder 
# or you can get every image from 'birds' in order and then cull the images in the training set
# for a holdout set as I do below

holdout_set_all = tf.keras.utils.image_dataset_from_directory(
  'birds',
  shuffle=False,
  seed=17,
  image_size=(200, 200),
  batch_size=1) # batch size has to be one for this set

train_file_paths = training_set.file_paths
validation_file_paths = validation_set.file_paths
holdout_file_paths = holdout_set_all.file_paths

Found 118 files belonging to 2 classes.
Using 83 files for training.
Found 118 files belonging to 2 classes.
Using 35 files for validation.
Found 118 files belonging to 2 classes.


In [22]:
images = []
labels = []
for e, image_label in enumerate(holdout_set_all):
    f = holdout_file_paths[e]
    if f in validation_file_paths:
        images.append(image_label[0].numpy())
        labels.append(image_label[1].numpy())
len(validation_file_paths), len(images), len(labels)

(35, 35, 35)

In [23]:
training_set.class_names, validation_set.class_names

(['001.Black_footed_Albatross', '141.Artic_Tern'],
 ['001.Black_footed_Albatross', '141.Artic_Tern'])

In [24]:
history = model.fit(training_set,
      epochs=9,
      verbose=1,
      validation_data = validation_set)

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [25]:
model.evaluate(validation_set)



[0.8106783032417297, 0.6000000238418579]

In [27]:
img_arrays = []
for i in images:
    img_arrays.append(i)

test_dataset = tf.data.Dataset.from_tensor_slices(img_arrays)
preds = model.predict(test_dataset)



In [28]:
preds[0]

array([0.5464277], dtype=float32)

In [33]:
import pandas as pd
from collections import Counter
c = Counter([i[0] for i in labels])
c

Counter({0: 20, 1: 15})

In [34]:
df = pd.DataFrame()
df['true_label'] = [i[0] for i in labels]
df['predict_probability'] = [i[0] for i in preds]
df = df.sort_values(by='predict_probability')
inferred_labels = [0 for i in range(c[0])] + [1 for i in range(c[1])]
df['predicted_label'] = inferred_labels 
df['compare'] = df['true_label'] == df['predicted_label']
df.groupby(['true_label', 'compare']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,predict_probability,predicted_label
true_label,compare,Unnamed: 2_level_1,Unnamed: 3_level_1
0,False,4,4
0,True,16,16
1,False,4,4
1,True,11,11


In [35]:
len(df.loc[df['compare'] == True])/len(df)

0.7714285714285715