# Model Evaluation

In [1]:
import time
import itertools

from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
import tensorflow_hub as hub

from src import utils#, eval_utils, rgbd_model, rgb_model, depth_model, hha_model, rgb_hha_model

## load dataset(s)

In [2]:
start_time = time.time()

print('loading rgbd train and test datasets')
train_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/train'
test_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/test'

# rgbd_train_dataset = utils.create_rgbd_dataset(train_data_path, azure=False)
rgbd_test_dataset = utils.create_rgbd_dataset(test_data_path, azure=False)

print(f'done! took {time.time()-start_time:.2f} seconds')

loading rgbd train and test datasets
done! took 34.23 seconds


In [3]:
# start_time = time.time()

# print('loading rgb train and test datasets')
# train_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/train'
# test_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/test'

# rgb_train_dataset = utils.create_rgb_dataset(train_data_path, azure=False)
# rgb_test_dataset = utils.create_rgb_dataset(test_data_path, azure=False)

# print(f'done! took {time.time()-start_time:.2f} seconds')

In [4]:
# start_time = time.time()

# print('loading depth train and test datasets')
# train_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/train'
# test_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGBD_dataset' + '/test'

# depth_train_dataset = utils.create_depth_dataset(train_data_path, azure=False)
# depth_test_dataset = utils.create_depth_dataset(test_data_path, azure=False)

# print(f'done! took {time.time()-start_time:.2f} seconds')

In [5]:
classes = list(rgbd_test_dataset.label_int_dict.keys())
ind_class_dict = {i: c for c, i in rgbd_test_dataset.label_int_dict.items()}

## RGB* Model

In [6]:
rgb_pred = np.loadtxt('preds/rgb_test_pred.txt').astype(int)
rgb_true = np.loadtxt('preds/rgb_test_true.txt').astype(int)

acc = np.average(rgb_pred==rgb_true)
print(f'accuracy: {acc}')

accuracy: 0.803758889265154


In [7]:
clf_report_df = pd.DataFrame(classification_report(rgb_true, rgb_pred, output_dict=True)).T
print(classification_report(rgb_true, rgb_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       629
           1       1.00      0.84      0.91       763
           2       1.00      0.99      1.00       687
           3       1.00      0.42      0.59       618
           4       1.00      0.67      0.80       727
           5       1.00      0.21      0.35       782
           6       0.98      1.00      0.99       557
           7       0.00      0.00      0.00       529
           8       0.99      1.00      1.00       635
           9       0.69      0.26      0.37       545
          10       1.00      0.99      0.99       542
          11       0.82      0.05      0.09       786
          12       1.00      1.00      1.00       584
          13       0.00      0.00      0.00       533
          14       1.00      0.09      0.17       583
          15       0.98      1.00      0.99       772
          16       0.99      1.00      1.00       781
          17       0.98    

In [8]:
cm = confusion_matrix(rgb_true, rgb_pred)
cm_normalized = cm / cm.sum(axis=1)

px.imshow(cm_normalized, x=classes, y=classes, 
    labels={'x': 'Predicted', 'y': 'True', 'color': 'Recall'}, title='RGB Confusion Matrix',  
    color_continuous_scale='blues',width=1000, height=800)

## Depth

In [9]:
depth_pred = np.loadtxt('preds/depth_test_pred.txt').astype(int)
depth_true = np.loadtxt('preds/depth_test_true.txt').astype(int)

acc = np.average(depth_pred==depth_true)
print(f'accuracy: {acc}')

accuracy: 0.40543515069420927


In [10]:
clf_report_df = pd.DataFrame(classification_report(depth_true, depth_pred, output_dict=True)).T
print(classification_report(depth_true, depth_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       629
           1       0.03      0.00      0.00       763
           2       0.00      0.00      0.00       687
           3       0.00      0.00      0.00       618
           4       0.32      0.05      0.08       727
           5       0.01      0.00      0.00       782
           6       0.00      0.00      0.00       557
           7       0.00      0.00      0.00       529
           8       0.50      0.00      0.00       635
           9       0.00      0.00      0.00       545
          10       0.94      0.31      0.47       542
          11       0.01      0.00      0.00       786
          12       1.00      0.04      0.08       584
          13       0.00      0.00      0.00       533
          14       0.00      0.00      0.00       583
          15       0.33      0.02      0.04       772
          16       0.69      0.54      0.60       781
          17       0.69    

In [11]:
cm = confusion_matrix(depth_true, depth_pred)
cm_normalized = cm / cm.sum(axis=1)

px.imshow(cm_normalized, x=classes, y=classes, 
    labels={'x': 'Predicted', 'y': 'True', 'color': 'Recall'}, title='Depth Confusion Matrix',  
    color_continuous_scale='blues',width=1000, height=800)

## HHA

In [12]:
hha_pred = np.loadtxt('preds/hha_test_pred.txt').astype(int)
hha_true = np.loadtxt('preds/hha_test_true.txt').astype(int)

acc = np.average(hha_pred==hha_true)
print(f'accuracy: {acc}')

accuracy: 0.48018963765662037


In [13]:
clf_report_df = pd.DataFrame(classification_report(hha_true, hha_pred, output_dict=True)).T
print(classification_report(hha_true, hha_pred))

              precision    recall  f1-score   support

           0       0.88      0.01      0.02       629
           1       0.50      0.00      0.00       763
           2       0.94      0.59      0.72       687
           3       0.10      0.01      0.01       618
           4       0.29      0.01      0.01       727
           5       1.00      0.11      0.20       782
           6       0.47      0.03      0.06       557
           7       0.00      0.00      0.00       529
           8       0.33      0.01      0.01       635
           9       0.00      0.00      0.00       545
          10       0.70      0.13      0.22       542
          11       0.00      0.00      0.00       786
          12       0.00      0.00      0.00       584
          13       0.00      0.00      0.00       533
          14       0.00      0.00      0.00       583
          15       0.74      0.29      0.42       772
          16       0.93      0.37      0.53       781
          17       0.94    

In [14]:
cm = confusion_matrix(hha_true, hha_pred)
cm_normalized = cm / cm.sum(axis=1)

px.imshow(cm_normalized, x=classes, y=classes, 
    labels={'x': 'Predicted', 'y': 'True', 'color': 'Recall'}, title='HHA Confusion Matrix',  
    color_continuous_scale='blues',width=1000, height=800)

## RGB-D

In [15]:
rgbd_pred = np.loadtxt('preds/rgbd_test_pred.txt').astype(int)
rgbd_true = np.loadtxt('preds/rgbd_test_true.txt').astype(int)

acc = np.average(rgbd_pred==rgbd_true)
print(f'accuracy: {acc}')

accuracy: 0.5474658539338526


In [16]:
clf_report_df = pd.DataFrame(classification_report(rgbd_true, rgbd_pred, output_dict=True)).T
print(classification_report(rgbd_true, rgbd_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       629
           1       0.04      0.00      0.00       763
           2       0.99      0.81      0.89       687
           3       0.99      0.13      0.22       618
           4       0.00      0.00      0.00       727
           5       0.00      0.00      0.00       782
           6       0.95      0.53      0.68       557
           7       0.00      0.00      0.00       529
           8       0.79      0.05      0.09       635
           9       0.00      0.00      0.00       545
          10       0.99      0.19      0.32       542
          11       0.00      0.00      0.00       786
          12       0.00      0.00      0.00       584
          13       0.00      0.00      0.00       533
          14       1.00      0.53      0.69       583
          15       1.00      0.84      0.91       772
          16       0.65      0.82      0.73       781
          17       0.00    

In [17]:
cm = confusion_matrix(rgbd_true, rgbd_pred)
cm_normalized = cm / cm.sum(axis=1)

px.imshow(cm_normalized, x=classes, y=classes, 
    labels={'x': 'Predicted', 'y': 'True', 'color': 'Recall'}, title='RGB-D Confusion Matrix',  
    color_continuous_scale='blues',width=1000, height=800)

## RGB-HHA

In [18]:
# rgb_hha_pred = np.loadtxt('preds/rgb_hha_test_pred.txt').astype(int)
# rgb_hha_true = np.loadtxt('preds/rgb_hha_test_true.txt').astype(int)

# acc = np.average(rgb_hha_pred==rgb_hha_true)
# print(f'accuracy: {acc}')

In [19]:
# clf_report_df = pd.DataFrame(classification_report(rgb_hha_true, rgb_hha_pred, output_dict=True)).T
# print(classification_report(rgb_hha_true, rgb_hha_pred))

In [20]:
# cm = confusion_matrix(rgb_hha_true, rgb_hha_pred)
# cm_normalized = cm / cm.sum(axis=1)

# px.imshow(cm_normalized, x=classes, y=classes, 
#     labels={'x': 'Predicted', 'y': 'True', 'color': 'Recall'}, title='RGB-HHA Confusion Matrix',  
#     color_continuous_scale='blues',width=1000, height=800)