# PowerAI Vision Model Validation

A PowerAI Vision model is built with testing and training within the PowerAI Vision UI. Additional manual testing can be done from within the UI.
This notebook demonstrates a variety of metrics that can be collected using Python code and the API endpoint of a deployed model.  The test images
are read from local directories and the results are shown in this notebook and also exported to CSV files.

![](../doc/source/images/confusion_matrix.png)

## Required Setup

Edit the following cell to provide the API endpoint of your deployed PowerAI Vision model in the URL variable.
Customize additional constants (e.g. set your input directory) as needed.

In [None]:
# Copy/paste API endpoint from your PowerAI Vision deployed model.
URL = 'https://url/guid'
URL = 'https://ny1.ptopenlab.com/powerai-vision-ny/api/dlapis/1f18a4a8-dc13-4d2e-a23a-a3fd2ad26206'

# Provide an input directory. Its subdirectories contain test images.
INPUT_DIR = '../test_images'  # Use subdir names as ground truth class names (TODO: Allow map)
OUTPUT_PREFIX = 'result'   # For output CSV file(s)

# WARNING: If False, for convenience, we are not validating the certificate when using the PowerAI API endpoint.
VERIFY_CERT = False

## Python Modules

In [None]:
!pip install --user pandas_ml==0.6.1

## Imports

In [None]:
from __future__ import print_function
import collections
import json
import os
import re
import requests
import time
import logging
import csv

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn
import pandas
from pandas_ml import ConfusionMatrix
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, matthews_corrcoef

## Logging

In [None]:
# Use logger to control debug output levels.
logging.basicConfig()
logger = logging.getLogger("logger")
# Set desired logging to ERROR, INFO, WARNING, or DEBUG.
logger.setLevel("ERROR")
logger.debug("Debug logging is enabled.")
logger.info("Info logging is enabled.")
logger.error("Error logging is enabled.")
requests.packages.urllib3.disable_warnings()

## Inference Functions

In [None]:
# Use regular expression to only classify JPG or PNG images.
file_matcher = re.compile("jpg$|jpeg$|png$", re.IGNORECASE)

#------------------------------------
# classify files in directory and save results in dict
# returns dict upon completion
def classifyFiles(directory, normalize):
    results = {}
    totalFiles = 0
    skippedFiles = 0
    processedFiles = 0
    classifiedFiles = 0
    unclassifiedFiles = 0
    problemFiles = 0

    # To help track millisecond time for each inference
    current_milli_time = lambda: int(round(time.time() * 1000))

    logger.info("Classifying directory: {}".format(directory))
    for filename in os.listdir(directory):
        totalFiles += 1
        if file_matcher.search(filename):
            logger.debug("classifying {}".format(filename))

            processedFiles += 1
            filepath = os.path.join(directory, filename)
            files = {'files': open(filepath, 'rb')}

            startMilli = current_milli_time()
            rsp = requests.post(URL, verify=False, auth=None, files=files)
            stopMilli = current_milli_time()
            duration = stopMilli - startMilli
            logger.debug("classified {} in {} millis".format(filename, duration))
            if rspOk(rsp):
                result = rsp.json()
                logger.debug("json={}".format(json.dumps(result)))
                if "classified" in result:
                    classification = result["classified"]
                    logger.debug("classification = {}".format(json.dumps(classification)))
                    for classname in classification.keys():
                        confidence = classification[classname]

                        if classname == "negative":
                            unclassifiedFiles += 1
                            if normalize:
                                # translate 'negative' to 'non-classified' for better clarity
                                classname = "unclassified"
                                confidence = 0
                        else:
                            classifiedFiles += 1

                        logger.debug("Saving result {}, {}, {}".format(filename, classname, confidence))
                        results[filename] = {"filename": filename,
                                             "classification": classname,
                                             "confidence": confidence,
                                             "duration": duration}
                else:
                    problemFiles += 1
                    results[filename] = {"filename": filename,
                                         "classification": "None",
                                         "confidence": "",
                                         "duration": duration}
                    logger.warning("No classification for {}".format(filename))
            else:
                logger.error("ERROR")
                problemFiles += 1
                logger.error("Error result from server for {}".format(filename))
        else:
            skippedFiles += 1
            logger.info("skipping non-image file {}".format(filename))

    logger.info("TotalFiles: {}, ProcessedFiles: {}, classifiedFiles: {}, unclassifiedFiles: {}, skippedFiles: {}, problemFiles: {}".
                 format(totalFiles, processedFiles, classifiedFiles, unclassifiedFiles, skippedFiles, problemFiles))
    return results

# ------------------------------------
# Checks if result from Vision API succeeded
# (Current API returns failure indication in the JSON body)
def rspOk(rsp):
    logger.debug("status_code: {}, OK={}.".format(rsp.status_code, rsp.ok))

    if rsp.ok:
        try:
            jsonBody = rsp.json()
            if ("result" in jsonBody) and (jsonBody["result"] == "fail"):
                result = False
                logger.debug(json.dumps(jsonBody, indent=2))
            else:
                result = True
        except ValueError:
            result = True
            logger.error("good status_code, but no data")
    else:
        result = False

    return result

## Main

The main logic starts here and continues below.

* Loop over the `INPUT_DIR` sub-directories.
* Call the inference function on each image file.
* Use the built truth and predicted lists for the rest (below).

  * "truth" lists the classification we want for the image.
  * "predicted" lists the classification we got from the inference call.

In [None]:
#
# MAIN
#

truth = []
predicted = []

for dirname in os.listdir(INPUT_DIR):
    if not os.path.isdir(os.path.join(INPUT_DIR, dirname)):
        continue
    
    result_dict = classifyFiles(os.path.join(INPUT_DIR, dirname), False)
    logger.debug(result_dict)
    
    for k,v in result_dict.items():
        truth.append(dirname)  # The ground truth is dirname
        predicted.append(v['classification'])  # The predicted classification
        
confusion_matrix = ConfusionMatrix(truth, predicted)

## Plot the confusion matrix

Using matplotlib and seaborn, we can graphically show the confusion matrix.

In [None]:
confusion_matrix.plot(backend='seaborn', annot=True, linewidth=5, cmap='Blues');

## Plot the normalized confusion matrix

Setting `normalized=True` gives us values from 0 to 1. This may be a better representation when the wights are uneven.

In [None]:
confusion_matrix.plot(normalized=True, backend='seaborn', annot=True, linewidth=5, cmap='Greens');

## Classification metrics

In [None]:
# The handiest way to see the stats is with print_stats().
# But since we're coding for CSV output we have other ways
# to show the data below.

# confusion_matrix.print_stats()

### Collect the classification metrics by class and write them to a CSV

In [None]:
stats = confusion_matrix.stats()

class_csv = OUTPUT_PREFIX + "_class.csv"
logger.info("Writing stats by class to: " + class_csv)

with open(class_csv, "w") as outfile:
    csvwriter = csv.writer(outfile)

    first = True
    for classification, v in stats['class'].items():
        header = ['class']
        row = [classification]
        for x, y in v.items():
            header.append(x)
            row.append(y)
        if first:
            csvwriter.writerow(header)
            first = False
        csvwriter.writerow(row)

# Read and show the CSV file.
df = pandas.read_csv(class_csv)
df.style.hide_index()  # Preview here. To see the whole thing open the CSV file.

In [None]:
df.T  # Same data as above, but transposed for a better view of the metrics (perhaps).

## Calculate the overall metrics

The TP, TN, FP, and FN can be summed using the confusion matrix data frame.

In [None]:
# Get total TP, TN, FP, FN.
tp_tn_fp_fn = df.agg({'TP: True Positive': ['sum'],
                      'TN: True Negative': ['sum'],
                      'FP: False Positive': ['sum'],
                      'FN: False Negative': ['sum']})
tp_tn_fp_fn

## Use count to calculate the number of classes

In [None]:
# Get total class count.
class_count = df.agg({'class': ['count']})
class_count

## Use max to calculate the number of images classified

In [None]:
# Get total population.
population = df.agg({'Population': ['max']})
population

## pandas-ml provides some overall metrics in a classification report

This is the easy way, but it only offers weighted metrics and does not include MCC.

In [None]:
# Weighted classification report
classification_report = confusion_matrix.classification_report
classification_report

## Use sklearn.metrics directly to get more metrics

This gives us MCC and accuracy and also allows us to specify "macro" or "weighted" where applicable.

In [None]:
accuracy = accuracy_score(truth, predicted)

average = 'weighted'  # Use 'macro' or 'weighted'
precision = precision_score(truth, predicted, average=average)
recall = recall_score(truth, predicted, average=average)
f1 = f1_score(truth, predicted, average=average)
mcc = matthews_corrcoef(truth, predicted)

## Combine the overall metrics and write them to a CSV file

In [None]:
summary_csv = OUTPUT_PREFIX + "_summary.csv"
logger.info("Writing stats summary to: " + summary_csv)

# Pull some numbers out of the dataframes shown earlier
images = population.iat[-1, -1]
classes = class_count.iat[-1, -1]
tp = tp_tn_fp_fn['TP: True Positive']['sum']
tn = tp_tn_fp_fn['TN: True Negative']['sum']
fp = tp_tn_fp_fn['FP: False Positive']['sum']
fn = tp_tn_fp_fn['FN: False Negative']['sum']

# Combine the metrics in a CSV with a header
with open(summary_csv, "w") as outfile:
    csvwriter = csv.writer(outfile)

    first = True
    header = ['# of Classes', '# of Images', 'TP', 'TN', 'FP', 'FN', 'Precision', 'Recall', 'Accuracy', 'F1', 'MCC']
    row = [classes, images, tp, tn, fp, fn, precision, recall, accuracy, f1, mcc]
    csvwriter.writerow(header)
    csvwriter.writerow(row)
    
# Read and show the CSV file.
summary = pandas.read_csv(summary_csv) # Preview the CSV file here
summary.style.hide_index()

<font size=-1 color=gray>
<hr>
&copy; Copyright 2003,2016,2019 IBM Corp. All Rights Reserved.
<p>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. See the License for the specific language governing permissions and
limitations under the License.
</font></p>
