In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
import os
import cv2
import numpy as np

In [2]:
print(tf.__version__)

2.8.0


In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
dataPath = "../../data/OCT/OCT2017/"

labels_available = os.listdir(os.path.join(dataPath, "train"))
print("Total Number of Classes Detected :",len(labels_available))

labels_list = ['NORMAL',"CNV","DME","DRUSEN"]

x_tst=[]
y_tst=[]
for x in labels_list:
    xPath = os.path.join(dataPath, "test", x)
    myPicList = os.listdir(xPath)
    for y in myPicList:
        x_tst.append(cv2.imread(os.path.join(xPath, y)))
        y_tst.append(labels_list.index(x))
    print(x ,end=" ")

Total Number of Classes Detected : 4
NORMAL CNV DME DRUSEN 

In [5]:
def resizeIm(im, size):
    if im.shape[2] == 1:
        im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
    return cv2.resize(im, size)

def resizeIms(x, size):
    return np.array(list(map(lambda im: resizeIm(im, size), x)))

In [48]:
class MetricsCalculator():
    def __init__(self, Y_labels, Y_probabilityPreds, labels_names=None):
        import numpy as np
        import sklearn.metrics as metrics
        self._Y = Y_labels # correct labels
        self._probabilityPredictions = Y_probabilityPreds # predictions with probability distribution
        self._unique_labels = np.unique(self._Y)
        self._labelPredictions = tf.math.argmax(self._probabilityPredictions, axis=1).numpy()
        self._confusion_matrix = metrics.confusion_matrix(self._Y, self._labelPredictions)
        self._withFigures = False # if figures will be generated
        self._figuresPath = None # non given path
        self.labels_names = labels_names
    
    
    # If is set, images will be generated
    def SetFiguresOn(self, path=''):
        self._figuresPath = path
        self._withFigures = True
        
    def SetFiguresOff(self):
        self._withFigures = False
    
    
    # Compute confusion matrix for specific label if given
    def Confusion_matrix(self, target_label_index=None, return_type="matrix"):
        """Computes a binary confusion matrix for `target_label` if given.
        Otherwise returns the multi-class confusion matrix.
        Parameters
        ----------
        target_label_index : int, default=None
            Label index whom to compare with other classes 
            to perform confusion matrix.
        return_type : str, default="matrix"
            If binary (or target_label_index is specified) classification problem, 
            determines the return format. Acceptable values are "matrix" and "dict".
            
            If "matrix" is given, following return format will be followed:
            
                     labels
                p  -----------
                r  | TP | FP |
                e  -----------
                d  | FN | TN |
                s  -----------
            
            Otherwise a dictionary with the corresponding keys will be returned.
        Returns
        -------
        confusion_matrix : array-like, dict
            Confusion matrix from labels with TP, TN, FP and TN values.
        """
        import sklearn.metrics as metrics
        cm = self._confusion_matrix
        nLabels = len(self._unique_labels) # number of labels to predict
        if return_type not in ["matrix", "dict"]:
            raise ValueError(f'{return_type} is an invalid `return_type` value. Acceptable values are "matrix" and "dict".')
        
        # Save heatmap
        if self._withFigures:
            from os import path, mkdir
            import seaborn as sns
            import matplotlib.pyplot as plt
            
            ax = sns.heatmap(cm, annot=True, cmap='Blues')
            ax.set_title('Confusion Matrix\n\n');
            ax.set_xlabel('\nActual Values');
            ax.set_ylabel('Predicted Values ');

            if self.labels_names:
                ## Ticket labels - List must be in alphabetical order
                ax.xaxis.set_ticklabels(self.labels_names)
                ax.yaxis.set_ticklabels(self.labels_names)

            # Create directory if it does not exist yet
            if not path.isdir(self._figuresPath):
                mkdir(self._figuresPath)
            ## Display the visualization of the Confusion Matrix
            plt.savefig(f"{os.path.join(self._figuresPath, 'confusionMatrix.png')}", dpi=600)
            plt.cla() # Clear axes
        
        # If not particular label, return complete confusion matrix
        if target_label_index == None:
            # return sklearn matrix
            if return_type == "matrix":
                return cm
            # create dict from values
            elif return_type == "dict" and nLabels == 2:
                return {
                    "TP": cm[0][0],
                    "TN": cm[0][1],
                    "FP": cm[1][0],
                    "TN": cm[1][1]
                }
        # if invalid input fiven
        if target_label_index < 0 or type(target_label_index) != int:
            raise ValueError(f"`target_label_index` must be a positive integer value, {target_label_index} given")
        if target_label_index > nLabels:
            raise ValueError(f"Index {target_label_index} is greater that the available number of classes")
        
        i = target_label_index # rename index of desired label
        TP = cm[i][i] # correct label predictions
        FP = np.sum(cm[i]) - TP # incorrect label predictions
        FN = np.sum(cm, axis=0)[i] - TP # incorrect other labels predictions
        TN = np.sum(cm) - TP - FP - FN # correct other labels predictions
        
        if return_type == "matrix":
            return [[TP, FP], [FN, TN]]
        elif return_type == "dict":
            return {
                "TP": TP,
                "FN": FN,
                "FP": FP,
                "TN": TN
            }
    
    # Compute accuracy over results
    def Accuracy(self, target_label_index=None, confusion_matrix=None, confusion_dict=None):
        """Measures accuracy.
        Parameters
        ----------
        target_label_index : int, default=None
            Label index whom to compare with other classes 
            to perform confusion matrix.
        confusion_matrix (optional) : array-like
            Confusion matrix where to take model performance and
            measure accuracy.
        Returns
        -------
        accuracy : float
            Accuracy measure.
        """
        import sklearn.metrics as metrics
        # If not confusion matrix was given, compute it from scratch
        if not confusion_matrix or confusion_dict:
            confusion_matrix = self.Confusion_matrix(return_type="matrix") if not target_label_index else self.Confusion_matrix(return_type="matrix", target_label_index=target_label_index)
        # If confusion matrix is given, convert into dictionary
        if confusion_dict:
            confusion_matrix = [[confusion_dict["TP"], confusion_dict["FP"]], [confusion_dict["FN"], confusion_dict["TN"]]]
        return np.sum([confusion_matrix[i][i] for i in range(len(confusion_matrix))]) / np.sum(confusion_matrix) # compute overall correct predictions
    
    
    # Measure unbalanced metrics
    def _UnbalancedMetric(self, metric, options=None):
        if options:
            r = options
        else:
            r = {"micro": None, "macro": None, "weighted": None}
        for k in r.keys():
            r[k] = metric(self._Y, self._labelPredictions, average=k)
        return r
    
    
    # Measures the AUC of the given index
    def AUC(self):
        raise NotImplementedError("TODO")
        
    
    # Measures the IoU of the given index
    def IoU(self):
        import sklearn.metrics as metrics
        return self._UnbalancedMetric(metrics.jaccard_score)
        
        
    # Compute recall (sensitivity)
    def Recall(self):
        import sklearn.metrics as metrics
        return self._UnbalancedMetric(metrics.recall_score)
    
    
    # Compute precision
    def Precision(self):
        import sklearn.metrics as metrics
        return self._UnbalancedMetric(metrics.precision_score)
    

    # Compute F1-Score
    def F1score(self):
        import sklearn.metrics as metrics
        return self._UnbalancedMetric(metrics.f1_score)
    
    
    # Compute CohenKappa
    def CohenKappa(self):
        import sklearn.metrics as metrics
        return metrics.cohen_kappa_score(self._Y, self._labelPredictions)
    
    
    # Compute and display ROC Curves
    def ROCCurve(self, type_multi_class="ovo"):
        import sklearn.metrics as metrics
        return roc_auc_score(self._Y, self._probabilityPredictions, multi_class=type_multi_class)
    
    
    # Compute all metrics
    def MetricsReport(self):
        metrics_options = {
            "accuracy": self.Accuracy, 
            "IoU": self.IoU, 
            "recall": self.Recall, 
            "precision": self.Precision, 
            "f1-score": self.F1score, 
            "CohenKappa": self.CohenKappa,
        }
        r = dict()
        for metric, method in metrics_options.items():
            r[metric] = method()
        return r
    
        
    
    ##### AUC
    ##### IoU
    ##### Recall (sensitivity)
    ##### Precision
    ##### Specificity
    ##### F1 Score
    ##### CohenKappa
    ##### ROC Curve figure
    ##### Confusion matrix (figure)


class ModelMetricsCalculator(MetricsCalculator):
    """Compute most common performance metrics for a binary
    or a multiple classification model.
    Parameters
    ----------
    classifier : tensorflow classification model
        Trained classifier model to be used for predictions.
    X : array-like, or tensor
        Inputs to be passed to the classifier.
    Y : array-like, or tensor
        Target labels to be predicted by the classifier.
    """
    
    def __init__(self, classifier, X, Y, labels_names=None):
        import sklearn.metrics as metrics
        import numpy as np
        classifier.trainable = False # freeze layer weights
        probabilityPredictions = model.predict(X)
        super().__init__(Y, probabilityPredictions, labels_names) # init from parent class

In [49]:
model = keras.models.load_model(f"../octnet/octnet_30epochs_83484_images_True_newWeights_False_lastLayerOnly_zerocenter_normalization")
X_test = np.array(resizeIms(x_tst, (227, 227)))
Y_test = np.array(y_tst)
modelMetrics = ModelMetricsCalculator(model, X_test, Y_test, labels_names=labels_list)
#print(modelMetrics.Confusion_matrix(target_label_index=1))
modelMetrics.SetFiguresOn(path="ResultsFigures/testing")
print(modelMetrics.MetricsReport())

{'accuracy': 0.9865702479338843, 'IoU': {'micro': 0.9734964322120285, 'macro': 0.9735883885234534, 'weighted': 0.9735883885234534}, 'recall': {'micro': 0.9865702479338843, 'macro': 0.9865702479338844, 'weighted': 0.9865702479338843}, 'precision': {'micro': 0.9865702479338843, 'macro': 0.9870181405895692, 'weighted': 0.9870181405895692}, 'f1-score': {'micro': 0.9865702479338843, 'macro': 0.9865790547487024, 'weighted': 0.9865790547487024}, 'CohenKappa': 0.9820936639118457}


In [10]:
import pandas as pd
results = pd.DataFrame(columns=[
    "model", "train set images", "accuracy", "iou", "recall", "precision",
    "f1-score", "cohen kappa"
])
print(results)

Empty DataFrame
Columns: [model, train set images, accuracy, iou, recall, precision, f1-score, cohen kappa]
Index: []


In [11]:
t_sizes = [(835, 834.84), (2087, 2087.1), (4174, 4174.2), (6261, 62613.0), (7514, 75135.6), (8348, 83484.0), (20871, 20871.0), (33394, 33393.6), (41742, 41742.0), (50090, 50090.4), (62613, 62613.0), (75136, 75135.6), (83484, 83484.0)]
models = {
    "resnet": {"size": (224, 224), "norm": "zerocenter"}, 
    "xception": {"size": (299, 299), "norm": "rescale-symmetric"},
    "octnet": {"size": (227, 227), "norm": "zerocenter"},
    "opticnet": {"size": (224, 224), "norm": "rescale-symmetric"}
}

import pandas as pd
results = pd.DataFrame(columns=[
    "model", "train set images", "pretrained", "trained layers", "accuracy", "iou", "recall", "precision",
    "f1-score", "cohen kappa"
])

# Check all models
for m, args in models.items():
    # resize images
    X_test = np.array(resizeIms(x_tst, args["size"]))
    Y_test = np.array(y_tst)
    
    # Check for each train set size
    for t in t_sizes:
        for pretrained in [False, True]:
            lLayer = [True, False] if pretrained else [False]
            for lastLayer in lLayer:
                try:
                    model = keras.models.load_model(f"../{m}/{m}_30epochs_{t[0]}_images_{not pretrained}_newWeights_{lastLayer}_lastLayerOnly_{args['norm']}_normalization")
                except:
                    model = keras.models.load_model(f"../{m}/{m}_30epochs_{t[1]}_images_{not pretrained}_newWeights_{lastLayer}_lastLayerOnly_{args['norm']}_normalization")
                modelMetrics = ModelMetricsCalculator(model, X_test, Y_test)
                # save images from metrics
                modelMetrics.SetFiguresOn(path=f"ResultsFigures/{m}/30epochs_{t[0]}_images_{not pretrained}_newWeights_{lastLayer}_lastLayerOnly")
                mets = modelMetrics.MetricsReport() # compute all metrics
                results = results.append({
                    "model": m,
                    "train set images": t[0], 
                    "pretrained": pretrained, 
                    "trained layers": lastLayer,
                    "accuracy": mets["accuracy"],
                    "iou": mets ["IoU"]["weighted"],
                    "recall": mets["recall"]["weighted"],
                    "precision": mets["precision"]["weighted"],
                    "f1-score": mets["f1-score"]["weighted"],
                    "cohen kappa": mets["CohenKappa"]
                }, ignore_index=True)
    
    del X_test
    del Y_test
        
print(results)



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


        model  train set images pretrained trained layers  accuracy       iou  \
0      resnet     (835, 834.84)      False          False  0.250000  0.062500   
1      resnet     (835, 834.84)       True           True  0.310950  0.171539   
2      resnet     (835, 834.84)       True          False  0.498967  0.320204   
3      resnet    (2087, 2087.1)      False          False  0.250000  0.062500   
4      resnet    (2087, 2087.1)       True           True  0.278926  0.153608   
..        ...               ...        ...            ...       ...       ...   
151  opticnet  (75136, 75135.6)       True           True  0.250000  0.062500   
152  opticnet  (75136, 75135.6)       True          False  0.996901  0.993840   
153  opticnet  (83484, 83484.0)      False          False  0.998967  0.997938   
154  opticnet  (83484, 83484.0)       True           True  0.250000  0.062500   
155  opticnet  (83484, 83484.0)       True          False  0.995868  0.991803   

       recall  precision  f

In [12]:
results.to_csv("Metrics.csv", index=False)

In [168]:
predictions = model.predict(X_test)
y_test = Y_test
y_pred = tf.math.argmax(predictions, axis=1).numpy()

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('\nAccuracy: {}\n'.format(accuracy_score(y_test, y_pred)))


Accuracy: 0.9865702479338843



In [41]:
nClasses = len(labels_list)

acc = tf.keras.metrics.Accuracy() # accuracy
auc = tf.keras.metrics.AUC() # Area under the curve
iou = tf.keras.metrics.IoU(num_classes=nClasses, target_class_ids=[0, 1, 2, 3]) # INtersection over union
recall = tf.keras.metrics.Recall() # Recall (sensitivity)
chkp = tfa.metrics.CohenKappa(num_classes=nClasses, sparse_labels=True) # Cohen Kappa
# Specificity
# ROC figures
# COnfusion matrix figures

In [77]:
model = keras.models.load_model(f"../octnet/octnet_30epochs_83484_images_True_newWeights_False_lastLayerOnly_zerocenter_normalization")
model.trainable = False
predictions = model.predict(X_test)
y_test = Y_test
y_pred = tf.math.argmax(predictions, axis=1).numpy()

#importing confusion matrix
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_test, y_pred)
print('Confusion Matrix\n')
print(confusion)

#importing accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('\nAccuracy: {:.2f}\n'.format(accuracy_score(y_test, y_pred)))

print('Micro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='micro')))
print('Micro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='micro')))
print('Micro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='micro')))

print('Macro Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='macro')))
print('Macro Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='macro')))
print('Macro F1-score: {:.2f}\n'.format(f1_score(y_test, y_pred, average='macro')))

print('Weighted Precision: {:.2f}'.format(precision_score(y_test, y_pred, average='weighted')))
print('Weighted Recall: {:.2f}'.format(recall_score(y_test, y_pred, average='weighted')))
print('Weighted F1-score: {:.2f}'.format(f1_score(y_test, y_pred, average='weighted')))

from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(y_test, y_pred, target_names=labels_list))

Confusion Matrix

[[242   0   0   0]
 [  0 242   0   0]
 [  3   6 233   0]
 [  0   4   0 238]]

Accuracy: 0.99

Micro Precision: 0.99
Micro Recall: 0.99
Micro F1-score: 0.99

Macro Precision: 0.99
Macro Recall: 0.99
Macro F1-score: 0.99

Weighted Precision: 0.99
Weighted Recall: 0.99
Weighted F1-score: 0.99

Classification Report

              precision    recall  f1-score   support

      NORMAL       0.99      1.00      0.99       242
         CNV       0.96      1.00      0.98       242
         DME       1.00      0.96      0.98       242
      DRUSEN       1.00      0.98      0.99       242

    accuracy                           0.99       968
   macro avg       0.99      0.99      0.99       968
weighted avg       0.99      0.99      0.99       968



In [81]:
import matplotlib.pyplot as plt

def plot_roc_curve(fpr,tpr): 
    plt.plot(fpr,tpr) 
    plt.axis([0,1,0,1]) 
    plt.xlabel('False Positive Rate') 
    plt.ylabel('True Positive Rate') 
    plt.show()

from sklearn.metrics import roc_auc_score, roc_curve
fpr, tpr, thresholds = roc_curve(y_test, predictions)
plot_roc_curve (fpr,tpr)

print(f"\nAUC: {roc_auc_score(y_test, predictions, multi_class='ovo')}")

from sklearn.metrics import jaccard_score
print(f"\nIoU: {jaccard_score(y_test, y_pred, average='macro')}")

ValueError: multiclass format is not supported

In [45]:
model = keras.models.load_model(f"../octnet/octnet_30epochs_83484_images_False_newWeights_False_lastLayerOnly_zerocenter_normalization")
model.trainable = False
prediction = model.predict(X_test)
#DME_predictions = prediction.where()
Y_pred = tf.math.argmax(prediction, axis=1).numpy()
acc.reset_state()
acc.update_state(Y_test, Y_pred)
#auc.reset_state()
#auc.update_state(Y_test, Y_pred)
tf.math.confusion_matrix(Y_test , Y_pred)



<tf.Tensor: shape=(4, 4), dtype=int32, numpy=
array([[241,   0,   1,   0],
       [  0, 242,   0,   0],
       [  2,   3, 237,   0],
       [  0,   2,   0, 240]], dtype=int32)>

In [28]:
for t in [83484]:
    model = keras.models.load_model(f"../octnet/octnet_30epochs_{t}_images_False_newWeights_False_lastLayerOnly_zerocenter_normalization")
    model.trainable = False
    Y_pred = tf.math.argmax(model.predict(X_test), axis=1).numpy()
    
    print(f"Size: {Y_test.shape} and {Y_pred.shape}")
    
    acc.reset_state()
    acc.update_state(Y_test, Y_pred)
    acc.result().numpy()
    
    #spec.reset_state()
    spec.update_state(Y_test, Y_pred)
    spec.result().numpy()
    
    auc.reset_state()
    auc.update_state(Y_test, Y_pred)
    auc.result().numpy()
    
    iou.reset_state()
    iou.update_state(Y_test, Y_pred)
    iou.result().numpy()
    
    recall.reset_state()
    recall.update_state(Y_test, Y_pred)
    recall.result().numpy()
    
    chkp.reset_state()
    chkp.update_state(Y_test, Y_pred)
    chkp.result().numpy()
    
    print(f"For size: {t} Acc: {acc.result().numpy()} Spec: {spec.result.numpy()}")

Size: (968,) and (968,)


InvalidArgumentError: predictions must be <= 1
Condition x <= y did not hold.
First 3 elements of x:
[0. 0. 0.]
First 1 elements of y:
[1.]

In [9]:
nClasses = len(labels_list)
octnet.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=[
        #tf.keras.metrics.SensitivityAtSpecificity(0.5),
        #tf.keras.metrics.AUC(),
        #tf.keras.metrics.MeanIoU(num_classes=nClasses),
        #tf.keras.metrics.Recall(),
        #tfa.metrics.CohenKappa(num_classes=nClasses, sparse_labels=True),
        tf.keras.metrics.Accuracy()
    ])


#octnet.predict(X_test)
octnet.evaluate(X_test, Y_test, batch_size=50)

ValueError: in user code:

    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1233 test_function  *
        return step_function(self, iterator)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1224 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /anaconda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1217 run_step  **
        outputs = model.test_step(data)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1185 test_step
        self.compiled_loss(
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    /anaconda/lib/python3.8/site-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, 1) and (None, 4) are incompatible
