In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

import PIL
from tqdm import tqdm
from PIL import Image

from matplotlib import rcParams

from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from tensorflow import keras

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from keras.utils import np_utils


In [None]:
#If running on Colab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Change as required
root = '/content/drive/MyDrive/ML_Project'
train_csv = root + '/train.csv'
train_images = root + '/train_images'
test_images = root + '/test_images'
model_path = root +'/models'

In [None]:
#Read in the training data for t he purpose of getting class labels
train_df = pd.read_csv(train_csv)

In [None]:
#OHE the target data
def labels(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    label_encoder = label_encoder.fit(values)
    lEncoded = label_encoder.transform(values)
    onehot_encoder = OneHotEncoder(sparse=False)
    lEncoded = lEncoded.reshape(len(lEncoded), 1)
    onehot_encoder = onehot_encoder.fit(lEncoded)
    OHEncoded = onehot_encoder.transform(lEncoded)
    y = OHEncoded
    return y, label_encoder

In [None]:
#OHE the training target data to get the class labels
ytrain = train_df['individual_id']
Ytrain, encoder = labels(ytrain)
print(Ytrain)

In [None]:
#Classes that correlate to the OHE array
classes = encoder.classes_
print(classes)

In [None]:
def imageLoad(paths):
  
  Xout = np.zeros((len(paths), 56, 80, 3))
  for i in tqdm(range(len(paths))):

    #Open image from path
    img = image.load_img(paths[i], target_size = (56, 80), color_mode = 'rgb')
    #Convert to array
    x = image.img_to_array(img)
    #Expand dimensions
    x = np.expand_dims(x, axis = 0)
    #Preprocess input for keras model
    x = preprocess_input(x)
    Xout[i] = x
  return Xout

In [None]:
#If the test_images.csv file does not already exist (it was not given) uncomment and run this cell
#Creates a test_images.csv file for future use

# test = os.listdir(test_images)
# col = ['image']
# test_df = pd.DataFrame(test, columns=col)
# test_df['predictions'] = ''
# test_df['image_path'] = test_images + '/'+ test_df['image']
# pd.set_option('display.max_colwidth', None)
# print(test_df)
# test_df.to_csv(root + '/test_images.csv')

In [None]:
#Read in data form created test_images.csv
test_df = pd.read_csv(root +'/test_images.csv')
test_df['image_path'] = test_images + '/' + test_df['image']
test_df = test_df[['image','predictions', 'image_path']]
pd.set_option('display.max_colwidth', None)

print(test_df)

In [None]:
#Load and rescale all test images
Xtest = imageLoad(test_df['image_path'])
Xtest /= 255

In [None]:
#Load the saved model. Update the model name as needed
model = keras.models.load_model(model_path + '/Model_CNN_Validation')

In [None]:
#Get predictions
predict=model.predict(Xtest, verbose = 1, batch_size = 32)

In [None]:
#Function for determining the indices and probabilities of the top 5 prediction
def maxIndices(predict):
  Indices = []
  Probs = []
  for i in tqdm(range(len(predict))):
    #Partition off the top 5 predictions
    temp = np.argpartition(-predict[i], 5)
    Indices.append(temp[:5])
    Probs.append(predict[i,temp[:5]])
  Indices_df = pd.DataFrame(Indices)
  Probs_df = pd.DataFrame(Probs)
  return Indices_df, Probs_df

In [None]:
#Get the predicition indices and probabilites
Predict_df, prob_df = maxIndices(predict[:])

In [None]:
print(prob_df)

In [None]:
#for each probability below the cutoff, change the value to new_individual
cutoff = 0.6
for i in range(5):
    prob_df[i] = np.where(prob_df[i]<=cutoff , 'new_individual', prob_df[i])
prob_df.head()

In [None]:
#Match up the predictions indices with locations where the prediciton was switched to new_individual

#get the indices where new_individual are 
col0 = prob_df[prob_df[0]=='new_individual'].index.values
col1 = prob_df[prob_df[1]=='new_individual'].index.values
col2 = prob_df[prob_df[2]=='new_individual'].index.values
col3 = prob_df[prob_df[3]=='new_individual'].index.values
col4 = prob_df[prob_df[4]=='new_individual'].index.values

#create a series containing "new_individual" in the specified indicies 
strs0 = ["new_individual" for x in range(len(col0))]
new0 = pd.Series(strs0, name = '0',index = col0)

strs1 = ["new_individual" for x in range(len(col1))]
new1 = pd.Series(strs1, name = '1',index = col1)

strs2 = ["new_individual" for x in range(len(col2))]
new2 = pd.Series(strs2, name = '2',index = col2)

strs3 = ["new_individual" for x in range(len(col3))]
new3 = pd.Series(strs3, name = '3',index = col3)

strs4 = ["new_individual" for x in range(len(col4))]
new4 = pd.Series(strs4, name = '4',index = col4)

In [None]:
print(Predict_df.shape)
print(classes.shape)
print(Predict_df.iloc[Predict_df.idxmax()])
print(Predict_df.iloc[Predict_df.idxmin()])

In [None]:
#Create a data frame and get the String class names using the indices from the predictions
newPred_df = pd.DataFrame()
newPred_df['0'] = classes[Predict_df.iloc[:,0]]
newPred_df['1'] = classes[Predict_df.iloc[:,1]]
newPred_df['2'] = classes[Predict_df.iloc[:,2]]
newPred_df['3'] = classes[Predict_df.iloc[:,3]]
newPred_df['4'] = classes[Predict_df.iloc[:,4]]
newPred_df.head(10)


In [None]:
#update the predicted labels with the "new individual" labels
newPred_df.update(new0)
newPred_df.update(new1)
newPred_df.update(new2)
newPred_df.update(new3)
newPred_df.update(new4)

In [None]:
#Cobine all predictions into one column to match the Kaggle submission format
newPred_df['Combined'] = newPred_df[newPred_df.columns[:]].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

In [None]:
kaggleOut = pd.DataFrame()

kaggleOut['image'] = test_df['image']
kaggleOut['predictions']=newPred_df['Combined']
print(kaggleOut)
kaggleOut.to_csv(root+'/output.csv', index = False)