# Preprocessing for training, validation, and testing data

In [None]:
def splitfiles(folder, train_folder, val_folder):
    """Splits data into 80% training, 20% validation"""
    import os
    from shutil import move
    
    all_files = os.listdir(folder)
    filecount = len(all_files)
    train = int(0.8*filecount)
    for i, file in enumerate(all_files):
        curr = os.path.join(folder, file)
        if i < train:
            filepath = os.path.join(train_folder, file)
        else:
            filepath = os.path.join(val_folder, file)
        move(curr, filepath)
    return f'{folder} split.'

In [None]:
def unzip(folder):
    """
    Unzips a zip file
    """
    name = folder.rstrip(".zip")
    from zipfile import ZipFile
    with ZipFile(folder, 'r') as zObject:
        zObject.extractall(path=name)
    return name

In [None]:
import cv2
import imghdr
import os
    
def image_clear(data_dir):
    """removes unwanted images"""
    
    data_fol = os.listdir(data_dir)
    img_ext = ['jpeg', 'jpg', 'png', 'bmp']
    
    for file in data_fol:
        filepath = os.path.join(data_dir, file)
        size = os.path.getsize(filepath)
        ten_kib = 10240
        if size < ten_kib:
            print(f'{filepath} removed.')
            os.remove(filepath)
        try:
            img = cv2.imread(filepath)
            tip = imghdr.what(filepath)
            if tip not in img_ext:
                os.remove(filepath)
        except Exception as e:
            pass

In [None]:
import requests, csv, re, os, shutil

csvarr = []

#regex on webpage
html = requests.get("https://singaporebirds.com/singapore-bird-list/").text
pattern = re.compile(r'<td><a href="(.*)" target="_blank" rel="noopener">(.*)</a></td>')
tup = re.findall(pattern, html)

birds = os.listdir('ttv/train')
birds = [bird.lower() for bird in birds]

for url, bird in tup:
    if bird.lower() in birds:
        csvarr.append([url, bird])

#writing into csv
with open('bird_url.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    for url, bird in csvarr:
        arr = [bird] + [url] + [0]
        print(arr)
        writer.writerow(arr)

#creation of test folder in path 'static'
train_path = input('Train folder path?: ')
labels = os.listdir(train_path)
target = 'static/test'
for label in labels:
    allfiles = os.path.join(train_path, label)
    img = os.listdir(allfiles)[0]
    imgpath = os.path.join(allfiles, img)
    shutil.copy(imgpath, target)
    mypath = os.path.join(target, img)
    os.rename(mypath, os.path.join(target, label+'.jpg'))
    print(f'{img} copied from {label}')

# Training of deep learning model using InceptionV3 Transfer Learning

In [None]:
#remember to use tf environment
from keras.layers import Dense, Flatten, Dropout
from keras.models import Model #create and save model
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing.image import ImageDataGenerator #generates many images from the same image
from keras.optimizers import Adam
from keras import callbacks
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
img_size = [224, 224]

In [None]:
train_dir = 'ttv/train'
val_dir = 'ttv/validation'

In [None]:
inceptionv3 = InceptionV3(input_shape=img_size + [3], weights='imagenet', include_top=False)
    
for layer in inceptionv3.layers:
    layer.trainable = False

In [None]:
import os
folders = os.listdir(train_dir)

In [None]:
x = Flatten()(inceptionv3.output)
x = Dense(1024)(x)
predictions = Dense(len(folders), activation='sigmoid')(x)

In [None]:
# model object
model = Model(inputs=inceptionv3.input, outputs=predictions)
model.summary()
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=3e-4), metrics=['accuracy'])

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   rotation_range = 30,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   
                                   horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory(train_dir,
                                                target_size = (224, 224),
                                                batch_size = 64,
                                                class_mode = 'categorical')

validation_set = test_datagen.flow_from_directory(val_dir,
                                               target_size = (224, 224),
                                               batch_size = 5,
                                               class_mode = 'categorical')

In [None]:
#creating callback
from keras import callbacks

callback = callbacks.ModelCheckpoint(filepath='model', save_weights_only=True, verbose=1,
                                             monitor='val_accuracy', mode='max', save_best_only=True)

In [None]:
r = model.fit(training_set, steps_per_epoch=len(training_set), callbacks=[callback]
    epochs=10, validation_data=validation_set, validation_steps=len(validation_set))

In [None]:
import matplotlib.pyplot as plt
plt.plot(r.history['accuracy'], label='train_acc')
plt.plot(r.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()

In [None]:
# model.save('model/in_model_adam.h5') #important to save model!

In [None]:
label = training_set.class_indices
label = {k:v for v, k in label.items()}
print(label)

In [None]:
from keras.models import load_model
from keras.utils import load_img, img_to_array

def output(loc, my_model):
    """
    Parameters
    ----------
    loc - path of test image
    my_model - path of deep learning model
    """
    res = []
    img = load_img(loc, target_size=(224, 224, 3))
    img = img_to_array(img)
    img = img/255
    img = np.expand_dims(img, [0])
    answer=my_model.predict(img)
    y_class = answer.argmax(axis=-1)
    res = label[y_class[0]]
    return res

In [None]:
#put filepaths in test1, test2 to test >> eg. to test purple_heron.jpg, put testtest/purple_heron.jpg
# can use birds in ttv/test folders to test
import os
model = load_model('in_model_adam.h5', compile=False)
tests = os.listdir('testtest')
for test in tests:
    testpath = os.path.join('testtest', test)
    res = output(testpath, model)
    print(f'{test} ------> {res}')

Summary steps to take:
1. Preprocess dataset
2. Separate dataset into 0.7 train, 0.2 valid, 0.1 test
3. Use vgg16
4. Use ImageDataGenerator
5. Use Dense layer for number of folders
6. Compile model
7. Train model (peak performance 8 epochs)
8. Plot results of training (optional)
9. Load model
10. Output function for test results