In [None]:
import os
import pandas as pd
from IPython.display import clear_output

import numpy as np
import holoviews as hv
from holoviews import streams
hv.extension('bokeh')

import boonsdk
from boonlab.display import show_asset, show_thumbnails
from boonlab.pandas import search_to_df
from boonlab.proxies import download_proxy
from boonsdk import app_from_env


def get_labels_agg_for_pred(pred, model, minscore, maxscore):
    '''Return an aggregation for the label values corresponding to one analysis result value'''
    q = {
        "size": 0,
        "query": {
                    "bool": {
                        "filter": [
                            {"term": {model.namespace + ".label": pred}},
                            {"range": {model.namespace + ".score": {
                                "gte": minscore,
                                "lte": maxscore}}
                            }
                        ]
                    }

        },
        "aggs": {
            "names": {
                "nested": {
                    "path": "labels"
                },
                "aggs": {
                    "modelId": {
                        "filter": {
                            "term": {
                                "labels.modelId": model.id
                            }
                        },
                        "aggs": {
                            f'nested_{"names"}': {
                                "terms": {
                                    "field": "labels.label",
                                    "size": 1000
                                }
                            }
                        }
                    }
                }
            }
        }}


    
    
    search = app.assets.search(q)
    return search.aggregations()['nested#names']['filter#modelId']['sterms#nested_names']['buckets']

def get_preds_agg(model):
    '''Return an aggregation for the possible values of a classifier results'''
    q = {
            "nested": {
                "path": model.namespace + '.label'
            },
            "aggs": {
                "names": {
                    "terms": {
                        "field": model.namespace + '.label',
                        "size": 1000,
                        "order": {"_count": "desc"}
                    }
                }
            }
        }

    search = app.assets.search(q)
    return search.aggregations()['sterms#names']['buckets']


In [None]:
%env BOONAI_SERVER https://dev.boonai.app/
%env BOONAI_APIKEY "eyJhY2Nlc3NLZXkiOiJET0JyUUE0bmI0VDMwOG9XbENmSlRRIiwic2VjcmV0S2V5IjoieGVsWXVxd2lpUWpWemI0c3Y2QXBPdyJ9"
app = app_from_env()


In [None]:
# Full Confusion Matrix 

model = app.models.find_one_model(name='knn')

minscore = 0.0
maxscore = 1.0

data = []
rows = []
%opts HeatMap [width=750 height=700 logz=True fontsize={'xticks': '6pt'}, tools=['hover'] xrotation=90] (cmap='Blues') 

allpreds = []

preds = get_preds_agg(model)
for p in preds:
    allpreds.append(p['key'])
    
for p in preds:
    name = p['key']
    labels = get_labels_agg_for_pred(p['key'], model, minscore, maxscore)
    for l in labels:
        data.append((l['key'], p['key'], l['doc_count']))

df = pd.DataFrame(data, columns=['Real', 'Predicted', 'number'])
real = hv.Dimension("Real", values=np.unique(df["Real"]))
pred = hv.Dimension("Predicted", values=np.unique(df["Predicted"]))
heatmap = hv.HeatMap(df, kdims = [real, pred])#heatmap.opts(height=500, width=900)
heatmap.opts(height=900, width=900)

hv.ipython.display(heatmap)


In [None]:
# This is the data that made the matrix above, we will use below:

df

In [None]:
# Make a confusion matrix array from the data above
# documentation on this at https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html

from sklearn.metrics import confusion_matrix

true = list(df['Real'])
pred = list(df['Predicted'])
num = list(df['number'])
confusion_matrix(true, pred, sample_weight=num, normalize=None)

In [None]:
# Plot the graph with matplotlib
# Note the axis are swapped...

import matplotlib.pyplot as plt 

cm = confusion_matrix(true, pred, sample_weight=num, normalize=None)
plt.figure(figsize=(8, 6))
im = plt.imshow(cm, cmap=plt.cm.Blues)

In [None]:
# Finally, make an image for the whole matrix. "x" will contain opencv image data
import cv2

r = plt.gcf().canvas.get_renderer()
x = im.make_image(r, magnification=2.0)[0]

In [None]:
plt.imshow(x)
plt.show()

In [None]:
# To save the image as a file we need to do GBR -> RGB because of the way opencv saves color channels

y = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
cv2.imwrite('conf.jpg', y)