# Environment

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

In [3]:
# Basic
import os
import gc
import re
import sys
import glob
import numpy as np
import pandas as pd
from pathlib import Path
from pprint import pprint
import datetime

from collections import defaultdict
from typing import (List, Dict, Any, NoReturn, 
                    Tuple, Optional, Union)

from tqdm import tqdm, tqdm_notebook
import warnings
warnings.filterwarnings('ignore')

In [4]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc

In [5]:
sys.path.insert(0,"..")

from config import init_config, config
from helpers import read_json, write_json, update_parameters
from create_training_dataset import TrainDataset, RunDataset

In [7]:
# System adjustments - for all colums to fit into output (default width is 80)
pd.options.display.width = 2500
pd.options.display.max_rows = 999
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)
pd.options.display.max_colwidth = 100
sns.set_style("whitegrid")

## Define paths

In [8]:
CONFIG_FILE = "../set_locations.ini"
init_config(CONFIG_FILE)
TRAIN_DIR = config.get("DataPaths", "train_data")
OWNER_DIR = config.get("DataPaths", "owner_data")
RUN_DIR = config.get("DataPaths", "run_data")

### Confusion matrix
+ TP - (1, 1)
+ FN - (1, 0)
+ TN - (0, 0)
+ FP - (0, 1)

### ROC-AUC Curve

In [9]:
y_pred = [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1]
y_true = [0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
y_pred_probas = [0.06938956528902054, 0.4620834797620773, 0.024973433651030046, 
                 0.6395360529422759, 0.3332471370697014, 0.5266118049621582, 
                 0.18668699413537976, 0.17641305476427077, 0.25996942520141586, 
                 0.40525578558444963, 0.3049312561750412, 0.42381909191608425, 0.721104884147644]
cc = confusion_matrix(y_true, y_pred)
fpr, tpr, thresholds = roc_curve(y_true, y_pred_probas)

### Visualize

In [18]:
def visualize_quality(y_true, y_pred, y_pred_probas):
    plot_roc_curve(y_true, y_pred_probas)
    plot_confusion_matrix(y_true, y_pred)
    
def plot_roc_curve(y_true, y_pred_probas):    
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_probas)
    fig = px.area(
        x=fpr, y=tpr,
        title=f'ROC Curve (AUC={auc(fpr, tpr):.4f})',
        labels=dict(x='False Positive Rate', y='True Positive Rate'),
        width=700, height=500
    )
    fig.add_shape(
        type='line', line=dict(dash='dash'),
        x0=0, x1=1, y0=0, y1=1
    )

    fig.update_yaxes(scaleanchor="x", scaleratio=1)
    fig.update_xaxes(constrain='domain')
    fig.update_layout(
        autosize=False,
        width=500,
        height=600,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=4))
#     plotly.offline.plot(fig, filename='./output/'+ fn + '.html')
    fig.show()
    
    
def plot_confusion_matrix(y_true, y_pred):
    cc = confusion_matrix(y_true, y_pred)
    cc = cc[::-1]

    x = ['IMPOSTORS', 'GENUINE USERS']
    y =  x[::-1].copy() # invert idx values of x

    # change each element of z to type string for annotations
    z_text = [[str(y) for y in x] for x in cc]

    # set up figure 
    fig = ff.create_annotated_heatmap(cc, x=x, y=y, annotation_text=z_text, colorscale='Viridis')

    # add title
    fig.update_layout(title_text='<i><b>Confusion matrix</b></i>')

    # add custom xaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                            x=0.5, y=-0.15,
                            showarrow=False,
                            text="Predicted value",
                            xref="paper", yref="paper"))

    # add custom yaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                            x=-0.2, y=0.5,
                            showarrow=False,
                            text="Real value",
                            textangle=-90,
                            xref="paper", yref="paper"))

    # adjust margins to make room for yaxis title
    fig.update_layout(margin=dict(t=50, l=150))

    # add colorbar
    fig['data'][0]['showscale'] = True
    fig.update_layout(
        autosize=False,
        width=500,
        height=500,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=4))
    fig.show()

In [19]:
visualize_quality(y_true, y_pred, y_pred_probas)