In [None]:
# Import the necessary libraries
import numpy as np
import pandas as pd
import os , sys,cv2
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
import csv
import os
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import shutil

# Create lists to store the filenames from respective folders
inlier_files=[]
outlier_files=[]
# Append all filenames from inliner_train data
inlier_path = "./inlier_train/"
for i in os.listdir(inlier_path):
    inlier_files.append(i)
# Append all filenames from outlier_train data
outlier_path = "./outlier_train/"
for i in os.listdir(outlier_path):
    outlier_files.append(i)
# Create new folder to store the augmented outlier data
try:
    os.mkdir("./augmented")
except FileExistsError:
    pass
# Create new folder to store the final training data images (inliers+outliers+augmented images)
try:
    os.mkdir("./train")
except FileExistsError:
    pass
# Data Augmentation factors
datagen = ImageDataGenerator(
        rotation_range=120,
        zoom_range=0.4,
        zca_epsilon=1e-6,
        horizontal_flip=True,
        featurewise_center=True,
        featurewise_std_normalization=True,
        fill_mode='nearest')
# Create augmented image for each outlier image
for imagefile in outlier_files:
    name = './outlier_train/' + imagefile
    img = load_img(name,target_size=(224, 224))  
    x = img_to_array(img)
    x = np.expand_dims(x, axis=0)  
    i = 0
    for batch in datagen.flow(x, batch_size=1,save_to_dir='./augmented/', save_prefix='aug_', save_format='jpeg'):
        i += 1
        if i > 10:
            break
# Get all the filenames for the augmented images
aug_files = []
aug_path = "./augmented/"
for i in os.listdir(aug_path):
    aug_files.append(i)
# Label all augmented images with label = 1
df_o = pd.DataFrame(outlier_files, columns=['filename'])
df_o['label'] = 1
# Label all outlier images with label = 1
df_a = pd.DataFrame(aug_files, columns=['filename'])
df_a['label'] = 1
# Label all inlier images with label = 0
df_i = pd.DataFrame(inlier_files, columns=['filename'])
df_i['label'] = 0
# Merge all images filenames and label (augmented+outlier+inlier) to one dataframe
df_train = pd.concat([df_a,df_o,df_i], axis=0, ignore_index=True)
# Shift all augmented datafiles to train folder
source = './augmented/'
dest1 = './train/'
files = os.listdir(source)
for f in files:
        shutil.copy(source+f, dest1)
# Shift all outlier datafiles to train folder        
source = './outlier_train/'
dest1 = './train/'
files = os.listdir(source)
for f in files:
        shutil.copy(source+f, dest1)
# Shift all inlier datafiles to train folder
source = './inlier_train/'
dest1 = './train/'
files = os.listdir(source)
for f in files:
        shutil.copy(source+f, dest1)
# Get the final filename (image name) for all training data
filename = df_train.filename.values
# Get all labels for all training data
label = df_train.label.values

# Function to get the image size in the train image directory
def file_size_train(file_name):
    stats=os.stat("train/"+str(file_name))
    return stats.st_size

# Function to call the keras pre-trained model to get feature weights of the train images
def resnet_model_train():
    n=0
    model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    images_path = "./train"
    # Looping over every image present in the files list
    for img_path in filename:
        if(file_size_train(img_path)!=0):
            print(str(img_path))
            n+=1
            print(n)
            # load the image and resize it
            img = image.load_img("./train/"+str(img_path), target_size=(224, 224))
            # extract features from each image
            x_image = image.img_to_array(img)
            x_image = np.expand_dims(x_image, axis=0) # increase dimensions of x to make it suitable for further feature extraction
            x_image = preprocess_input(x_image)
            x_features = model.predict(x_image) # extract image features from model
            x_features = np.array(x_features) # convert features list to numpy array
            x_flatten= x_features.flatten() # flatten out the features in x
            train_features.append(x_flatten) # this list contains the final features of the training images

# Function to get the image size in the test image directory
def file_size_test(file_name):
    stats=os.stat("test/"+str(file_name))
    return stats.st_size

# Function to call the keras pre-trained model to get feature weights of the test images
def resnet_model_test():
    n=0
    model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    images_path = "./test"
    for f in os.listdir(images_path):
        test_files.append(f)
    # Looping over every image present in the files list
    for img_path in test_files:
        if(file_size_test(img_path)!=0):
            print(str(img_path))
            n+=1
            print(n)
            # load the image and resize it
            img = image.load_img("./test/"+str(img_path), target_size=(224, 224))
            # extract features from each image
            x_image = image.img_to_array(img)
            x_image = np.expand_dims(x_image, axis=0) # increase dimensions of x to make it suitable for further feature extraction
            x_image = preprocess_input(x_image)
            x_features = model.predict(x_image) # extract image features from model
            x_features = np.array(x_features) # convert features list to numpy array
            x_flatten= x_features.flatten() # flatten out the features in x
            test_features.append(x_flatten) # this list contains the final features of the test images

train_features=[]
resnet_model_train() # Call function to extract features of training images

ss = StandardScaler()
train_features = ss.fit_transform(train_features) # Scale training dataset

test_features=[]
test_files = []
resnet_model_test() # Call function to extract features of testing images
test_features = ss.transform(test_features) #scale testing dataset

# Call the KNN classification model
model_knn = KNeighborsClassifier(n_neighbors=2) # Using 2 neighbors as it is binary classification
model_knn.fit(train_features,label) # Train the model
pred_knn = model_knn.predict(test_features) # Predict the labels for the test image features

# Write the results in a CSV in the specified format
result = pd.DataFrame(pred_knn, columns=['Result'])
result.reset_index(inplace=True)
result.columns = ['ID', 'Result']
result.to_csv('output.csv', header=True, index=False)