<a href="https://colab.research.google.com/github/Matthew-Montalbano/automotive_ethics/blob/main/Object_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Placeholder dataset: https://www.kaggle.com/veeralakrishna/butterfly-dataset
Download "images" folder from here and upload to your drive

In [None]:
#Mount google drive 
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

# Other Modules
import numpy as np
import os
import glob
import sys
import shutil

# Images, plots, display, and visualization
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
#change this to where the "images" folder is located on your drive
base_directory = "/content/drive/My Drive/images"

The scientific (Latin) names of the butterfly categories are: 

001: Danaus plexippus	
002: Heliconius charitonius	
003: Heliconius erato	
004: Junonia coenia	
005: Lycaena phlaeas

006: Nymphalis antiopa	
007: Papilio cresphontes	
008: Pieris rapae	
009: Vanessa atalanta	
010: Vanessa cardui

In [None]:
#create labels for classes
#classes = ['Danaus plexippus', 'Heliconius charitonius', 'Heliconius erato', 'Junonia coenia', 'Lycaena phlaea', 'Nymphalis antiopa', 'Papilio cresphontes', 'Pieris rapae','Vanessa atalanta', 'Vanessa cardui' ]
classes = ['001', '002', '003', '004', '005', '006', '007', '008','009', '010' ]

In [None]:
#put pictures into correct class folders: uncomment code if running for first time, then comment
#images = glob.glob((base_directory) + '/*.png')
#for cl in classes:
  #images = glob.glob((base_directory) + '/' + cl + '*.png')
  #print("{}: {} Images".format(cl, len(images)))
  #print(os.path.join(base_directory, cl, cl))
  #for i in images:
    #shutil.copy(i, os.path.join(base_directory, cl))


In [None]:
#create training, test, validation sets from existing data: uncomment code if running for first time, then comment
#for cl in classes:
  #print(cl)
  #image_path = os.path.join(base_directory,cl)
  #print(image_path)
  #use glob extension to loop through image files
  #images = glob.glob(image_path + '/*.png')
  #print("{}: {} Images".format(cl, len(images)))
  #from 0 - .7 of images, from .7 - .7+.15 of images, from .7+.15-100 of images
  #train,val,test = images[:round(len(images)*0.7)], images[round(len(images)*0.7):round(len(images)*0.85)], images[round(len(images)*.85):]
  #print(len(val))
  #print(os.path.join(base_directory, 'val', cl))
  #for v in val:
     #shutil.copy(v, os.path.join(base_directory, 'val', cl))
  #print("done with " + cl + " val" )
  #for ts in test:
     #shutil.copy(ts, os.path.join(base_directory, 'test', cl))
  #print("done with " + cl + " test" )
  #for t in train:
     #shutil.copy(t,os.path.join(base_directory, 'train', cl))
  #print("done with " + cl + " train" )

In [None]:
#set up paths to training, validation, and test directories
train_dir = os.path.join(base_directory,'train') 
val_dir = os.path.join(base_directory,'val')
test_dir = os.path.join(base_directory,'test')

In [None]:
#Define macro values here
BATCH_SIZE = 100  # Number of training examples to process before updating our models variables
IMG_SHAPE  = 150  # Our training data consists of images with width of 150 pixels and height of 150 pixels

In [None]:
#rescale RBG values from 0-255 to 0-1
train_image_generator      = ImageDataGenerator(rescale=1./255)  # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255)  # Generator for our validation data

In [None]:
#Generate batches of tensor image data. We feed in our paths to our images and the generator will 
#apply our data augmentation functions to batch size chucks for all the classes of our images.
# we do data augmentation to prevent overfitting

image_gen_train = ImageDataGenerator(
                    rescale=1./255,
                    rotation_range=45,
                    width_shift_range=.15,
                    height_shift_range=.15,
                    horizontal_flip=True,
                    zoom_range=0.5
                    )

In [None]:
train_data_gen = image_gen_train.flow_from_directory(
                                                batch_size=BATCH_SIZE,
                                                directory=train_dir,
                                                shuffle=True,
                                                target_size=(IMG_SHAPE,IMG_SHAPE),
                                                class_mode='sparse'
                                                )

Found 582 images belonging to 10 classes.


In [None]:
image_gen_val = ImageDataGenerator(rescale=1./255)

val_data_gen = image_gen_val.flow_from_directory(batch_size=BATCH_SIZE,
                                                 directory=val_dir,
                                                 target_size=(IMG_SHAPE, IMG_SHAPE),
                                                 class_mode='sparse')

Found 125 images belonging to 10 classes.


In [None]:
#import regularizer
from keras import regularizers

model = tf.keras.models.Sequential([
    #convolution layer 16 outputs to one image input 
    tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(150, 150, 3)),
    #downsampling image
    tf.keras.layers.MaxPooling2D(2, 2),
    #convolution layer 32 outputs to one image input 
    tf.keras.layers.Conv2D(32, 3,kernel_regularizer=regularizers.l2(0.01), padding='same', activation='relu'),
    #downsampling image
    tf.keras.layers.MaxPooling2D(2,2),
    #convolution layer 64 outputs to one image input 
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    #downsampling image
    tf.keras.layers.MaxPooling2D(2,2),
    
    # flattens to 1d array
    tf.keras.layers.Flatten(),
    #Dropout of 20% to prevent over fitting (turns off some neurons)
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(512, kernel_regularizer=regularizers.l2(0.01), activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [None]:
#compile model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

#model summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 150, 150, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 75, 75, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 75, 75, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 37, 37, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 37, 37, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 18, 18, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 20736)             0

In [None]:
#import callbacks from keras
from keras.callbacks import EarlyStopping, ModelCheckpoint

#implement callbacks to early stop training and use best model so far
early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=3)
model_checkpoint_monitor = ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)

#reset variable weights here 
model.reset_states()

In [None]:
#uncomment to test if all files valid
#import os
#from PIL import Image
#folder_path = train_dir
#extensions = []
#for fldr in os.listdir(folder_path):
    #sub_folder_path = os.path.join(folder_path, fldr)
    #for filee in os.listdir(sub_folder_path):
        #file_path = os.path.join(sub_folder_path, filee)
        #print('** Path: {}  **'.format(file_path), end="\r", flush=True)
        #im = Image.open(file_path)
        #rgb_im = im.convert('RGB')
        #if filee.split('.')[1] not in extensions:
            #extensions.append(filee.split('.')[1])

In [None]:
#TRAIN MODEL
from PIL import Image
epochs = 30

history = model.fit_generator(
    train_data_gen,
    steps_per_epoch= int(np.ceil(train_data_gen.n / float(BATCH_SIZE))),
    epochs=epochs,
    callbacks=[early_stopping_monitor, model_checkpoint_monitor], # Early stopping
    validation_data=val_data_gen,
    validation_steps=int(np.ceil(val_data_gen.n / float(BATCH_SIZE)))
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


In [None]:
model.save('saved_model')

NameError: ignored

In [None]:
#LOAD SAVED MODEL
model = model.load_model('saved_model')

NameError: ignored

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

NameError: ignored

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(early_stopping_monitor.stopped_epoch, acc, label='Training Accuracy')
plt.plot(early_stopping_monitor.stopped_epoch, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
#TEST DIRECTORY LOSS AND ACCURACY
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(directory=test_dir, target_size = (150,150), class_mode='sparse')
test_loss, test_accuracy=model.evaluate_generator(test_generator)
print(test_loss)
print(test_accuracy)

predictions = model.predict(img_tensor)
print(predictions)
np.argmax(predictions[0])

In [None]:
from keras.preprocessing import image

img = image.load_img(test_dir + "", target_size=(150, 150))
img_tensor = image.img_to_array(img)                    # (height, width, channels)
img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
model.predict(img_tensor)

img_tensor

plt.imshow(img_tensor[0])
plt.axis('off')
plt.show()