In [1]:
import numpy as np
from numpy import asarray

from PIL import Image

import os

import glob

import keras
from keras.layers import Dense, Softmax, Conv2D, Input, MaxPooling2D, Flatten, RandomContrast
from keras.models import Sequential, load_model
from keras.utils import to_categorical
from keras.preprocessing.image import img_to_array, load_img

from sklearn.ensemble import RandomForestClassifier
from skimage.transform import resize
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import LabelEncoder

import torch
from torch.utils.data import Dataset, DataLoader

from joblib import dump

from matplotlib import pyplot as plt



# Load the data

In [2]:
'''

CREATE THE .NPZ FILE

'''
# folder_path = './data/MixedTrainingNumpy/'

# data = {}

# for file in os.listdir(folder_path):
#     if file.endswith('.npy'):

#         file_path = os.path.join(folder_path, file)
#         array = np.load(file_path)

#         label = file[0]

#         data[label] = array

# np.savez('./data/Training.npz', **data)

'\n\nCREATE THE .NPZ FILE\n\n'

In [3]:
'''

LOADS AND "FORMALISES" DATA SO CAN BE PASSED INTO PREPROCESSING

'''

def loadData(filePath):
    images = []
    labels = []

    for filename in os.listdir(filePath):
        if filename.endswith('.npy'):

            img = np.load(os.path.join(filePath, filename))

            img = resize(img, (128, 128), anti_aliasing=True) / 255 # TODO Try normalise between -1 and 1

            images.append(img)
            
            label = filename[0]
            labels.append(label)

    imagesNP = np.array(images)
    labelsNP = np.array(labels)

    # Adjust this if your labels are not numeric
    encoder = LabelEncoder()
    intLabels = encoder.fit_transform(labelsNP)
    intLabels = to_categorical(intLabels)

    # Split the data into training and validation sets
    return imagesNP, intLabels

In [4]:
def featureExtractionModel():
    model = Sequential()
    model.add(Input(shape=(128,128,3))) # Images are 100 by 100 and RGB

    model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))#Break the image into separate sub-image
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())

    # Softmax Regression
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=4, activation='softmax'))
    return model

In [8]:
def main():
    filePath = './data/MixedTrainingNumpy/'
    imagesNP, labelsNP = loadData(filePath)
    print("Done")

    imTrain, imTest, labTrain, labTest = train_test_split(imagesNP, labelsNP, test_size=0.2, random_state=42)

    print(imTrain.shape)
    print(labTrain.shape)
    print(imTest.shape)
    print(labTest.shape)

    # kFold = KFold(n_splits=5, shuffle=True, random_state=42)

    # foldN = 1
    # bestLoss = 10

    # for train, test in kFold.split(imTrain, labTrain):
    #     model = featureExtractionModel()

    #     #optimiser = keras.optimizers.Adam(learning_rate=0.001)
    #     model.compile(loss='categorical_crossentropy', metrics=['AUC', 'accuracy'])

    #     print(f'Training for fold {foldN}...')
    #     model.fit(imTrain[train], labTrain[train], epochs=15, batch_size=4, validation_data=(imTrain[test], labTrain[test]))

    #     foldN += 1
    #     score = model.evaluate(imTrain[test], labTrain[test], verbose=0)
    #     print(f'Score for fold {foldN}: {model.metrics_names[0]} of {score[0]}; {model.metrics_names[1]} of {score[1]*100}%')

    #     if score[0] < bestLoss:
    #         bestLoss = score[0]
    #         bestModel = model

    # bestModel.save("./CNN.h5")


    # test_score = model.evaluate(imTest, labTest, verbose=0)
    # print(f'Test Score: Loss = {test_score[0]}; AUC = {test_score[1]*100}%; Accuracy = {test_score[2]*100}%')

    model = load_model("./CNN.h5")

    featureModel = keras.Model(inputs=model.inputs, outputs=model.layers[-3].output)

    featureTrain = featureModel.predict(imTrain)
    print(featureTrain.shape)
    featureTest = featureModel.predict(imTest)
    print(featureTest.shape)

    randForest = RandomForestClassifier(n_estimators=1000, random_state=42, max_depth=20, max_features='sqrt', min_samples_leaf=1, min_samples_split=2, bootstrap=False)
    randForest.fit(featureTrain, np.argmax(labTrain, axis=1))

    rfPred = randForest.predict(featureTest)

    accuracy = accuracy_score(np.argmax(labTest, axis=1), rfPred)
    print(f'Random Forest Classifier Accuracy: {accuracy * 100}%')


main()

Done




(2289, 128, 128, 3)
(2289, 4)
(573, 128, 128, 3)
(573, 4)
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step
(2289, 28800)
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
(573, 28800)
Random Forest Classifier Accuracy: 91.44851657940663%
