## Model accuracy on test data
*Split from main notebooks to limit notebook size and also for organisational puproses

Now that we have trained and tuned two models, we have to pick a better one to continue with deploy. 

In [72]:
# Imports

import onnx
import onnxruntime as rt
import numpy as np
import pandas as pd
from PIL import Image

In [73]:
labels = ['Coast', 'Desert', 'Forest', 'Glacier', 'Mountain']

In [74]:
# Establishing data paths and lists

data_path = "./data/Testing Data"

coast_names = [name for name in os.listdir(data_path + "/Coast") if os.path.isfile(os.path.join(data_path + "/Coast", name))]
desert_names = [name for name in os.listdir(data_path + "/Desert") if os.path.isfile(os.path.join(data_path + "/Desert", name))]
forest_names = [name for name in os.listdir(data_path + "/Forest") if os.path.isfile(os.path.join(data_path + "/Forest", name))]
glacier_names = [name for name in os.listdir(data_path + "/Glacier") if os.path.isfile(os.path.join(data_path + "/Glacier", name))]
mountain_names = [name for name in os.listdir(data_path + "/Mountain") if os.path.isfile(os.path.join(data_path + "/Mountain", name))]

all_names = [coast_names + desert_names + forest_names + glacier_names + mountain_names]

In [75]:
# Helper function to transform image data to form model can accept

def image_transformer(path: str, size: int) -> np.ndarray:
    image = Image.open(path)
    image = image.resize((size, size))
    
    image = np.array(image)
    image = image.transpose(2,0,1).astype(np.float32)
    image /= 255

    image = image[None, ...]

    return image

(yes, this is the most inelegant solution there probably is to do this. Need to optimise, will do later.)

(ideally this sort of testing should've been done with data loaders before saving and exporting model.)

In [76]:
# A function that tediously brute-force tests the model...

def model_test_run(session, inputs, outputs):
    preds = []

    for item in coast_names:
        item = image_transformer(f"{data_path}/Coast/{item}", 224)
        results = session.run([outputs], {inputs: item})[0]
        label = labels[np.argmax(results)]
        if label == "Coast":
            preds.append(True)

    for item in desert_names:
        item = image_transformer(f"{data_path}/Desert/{item}", 224)
        results = session.run([outputs], {inputs: item})[0]
        label = labels[np.argmax(results)]
        if label == "Desert":
            preds.append(True)

    for item in forest_names:
        item = image_transformer(f"{data_path}/Forest/{item}", 224)
        results = session.run([outputs], {inputs: item})[0]
        label = labels[np.argmax(results)]
        if label == "Forest":
            preds.append(True)

    for item in glacier_names:
        item = image_transformer(f"{data_path}/Glacier/{item}", 224)
        results = session.run([outputs], {inputs: item})[0]
        label = labels[np.argmax(results)]
        if label == "Glacier":
            preds.append(True)

    for item in mountain_names:
        item = image_transformer(f"{data_path}/Mountain/{item}", 224)
        results = session.run([outputs], {inputs: item})[0]
        label = labels[np.argmax(results)]
        if label == "Mountain":
            preds.append(True)

    return len(preds)


In [77]:
# Loading and checking resnet34 model

model_res34 = onnx.load("models/landscape_model_resnet34.onnx")
onnx.checker.check_model(model_res34)

# Despite the name this returns a very un-human readable graph, hence I left it commented. Uncomment if you want to, it works just fine. 
# onnx.helper.printable_graph(model_res34.graph) 

In [78]:
# Loading and checking resnet50 model

model_res50 = onnx.load("models/landscape_model_resnet50.onnx")
onnx.checker.check_model(model_res50)

# onnx.helper.printable_graph(model_res50.graph) 


In [79]:
# Starting inference session for resnet34 model

res34_session = rt.InferenceSession("models\landscape_model_resnet34.onnx")
res34_inputs = res34_session.get_inputs()[0].name
res34_outputs = res34_session.get_outputs()[0].name
dims = res34_session.get_inputs()[0].shape

res34_inputs, res34_outputs, dims

('input', 'output', [1, 3, 224, 224])

In [80]:
# Resnet34 model predictions on test data

resnet_34_preds = model_test_run(res34_session, res34_inputs, res34_outputs)
accuracy_res34 = resnet_34_preds / len(all_names[0])

In [81]:
# Starting inference session for resnet50 model

res50_session = rt.InferenceSession("models\landscape_model_resnet50.onnx")
res50_inputs = res50_session.get_inputs()[0].name
res50_outputs = res50_session.get_outputs()[0].name
res50_dims = res50_session.get_inputs()[0].shape

res50_inputs, res50_outputs, dims

('input', 'output', [1, 3, 224, 224])

In [82]:
# Resnet34 model predictions on test data

resnet_50_preds = model_test_run(res50_session, res50_inputs, res50_outputs)
accuracy_res50 = resnet_50_preds / len(all_names[0])

In [83]:
print(f"Accuracy of resnet34 model on test dataset: {accuracy_res34}")
print(f"Accuracy of resnet50 model on test dataset: {accuracy_res50}")

Accuracy of resnet34 model on test dataset: 0.352
Accuracy of resnet50 model on test dataset: 0.968


For some reason there is a huge loss of accuracy in the resnet34 model. I actually suspect some sort of error in exporting and saving the model given the high validation accuracy, but I don't have the time to go back and check that. As I intended to use the resnet50 model anyway (due to higher val accuracy, and more tuning), we will just go ahead with that model as planned. 