# Facets
Denis Rothman, 2020 <br>
Adapted from Notebook Reference:<br>
https://github.com/PAIR-code/facets/blob/master/colab_facets.ipynb

## Installing Facets

In [None]:
#@title Install the facets-overview pip package.
!pip install facets-overview

Collecting facets-overview
  Downloading https://files.pythonhosted.org/packages/df/8a/0042de5450dbd9e7e0773de93fe84c999b5b078b1f60b4c19ac76b5dd889/facets_overview-1.0.0-py2.py3-none-any.whl
Installing collected packages: facets-overview
Successfully installed facets-overview-1.0.0


In [None]:
#@title Importing data <br> Set repository to "github"(default) to read the data from GitHub <br> Set repository to "google" to read the data from Google {display-mode: "form"}
import os
from google.colab import drive

#Set repository to "github" to read the data from GitHub 
#Set repository to "google" to read the data from Google
repository="github"

if repository=="github":
  !curl -L https://raw.githubusercontent.com/PacktPublishing/Hands-On-Explainable-AI-XAI-with-Python/master/Chapter03/DLH_train.csv --output "DLH_train.csv"
  !curl -L https://raw.githubusercontent.com/PacktPublishing/Hands-On-Explainable-AI-XAI-with-Python/master/Chapter03/DLH_test.csv --output "DLH_test.csv"

  #Setting the path for each file
  dtrain="/content/DLH_train.csv"
  dtest="/content/DLH_test.csv"
  print(dtrain,dtest)  

if repository=="google":
  #Mounting the drive. If it is not mounted, a prompt will provide instructions.
  drive.mount('/content/drive')
  #Setting the path for each file
  dtrain='/content/drive/My Drive/XAI/Chapter03/DLH_Train.csv'
  dtest='/content/drive/My Drive/XAI/Chapter03/DLH_Train.csv'
  print(dtrain,dtest)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4922  100  4922    0     0  25502      0 --:--:-- --:--:-- --:--:-- 25502
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5418  100  5418    0     0  24853      0 --:--:-- --:--:-- --:--:-- 24853
/content/DLH_train.csv /content/DLH_test.csv


# Facets Overview

## Loading the training and testing data

In [None]:
# Loading Denis Rothman research training and testing data into DataFrames.
import pandas as pd
features = ["colored_sputum","cough","fever","headache","days","france","chicago","class"]
train_data = pd.read_csv(
    dtrain,
    names=features,
    sep=r'\s*,\s*',
    engine='python',
    na_values="?")
test_data = pd.read_csv(
    dtest,
    names=features,
    sep=r'\s*,\s*',
    skiprows=[0],
    engine='python',
    na_values="?")

## Creating feature statistics for the datasets

In [None]:
# Create the feature stats for the datasets and stringify it.
import base64
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator

gfsg = GenericFeatureStatisticsGenerator()
proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data},
                                  {'name': 'test', 'table': test_data}])
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
print(protostr)

CqQ0CgV0cmFpbhC4ARqiBwoOY29sb3JlZF9zcHV0dW0QARqNBwqzAgi4ARgBIAEtAACAPzKkAhobCQAAAAAAAPA/EQAAAAAAAPA/IWZmZmZmZjJAGhsJAAAAAAAA8D8RAAAAAAAA8D8hZmZmZmZmMkAaGwkAAAAAAADwPxEAAAAAAADwPyFmZmZmZmYyQBobCQAAAAAAAPA/EQAAAAAAAPA/IWZmZmZmZjJAGhsJAAAAAAAA8D8RAAAAAAAA8D8hZmZmZmZmMkAaGwkAAAAAAADwPxEAAAAAAADwPyFmZmZmZmYyQBobCQAAAAAAAPA/EQAAAAAAAPA/IWZmZmZmZjJAGhsJAAAAAAAA8D8RAAAAAAAA8D8hZmZmZmZmMkAaGwkAAAAAAADwPxEAAAAAAADwPyFmZmZmZmYyQBobCQAAAAAAAPA/EQAAAAAAAPA/IWZmZmZmZjJAIAERKh/G/bokA0AZv/vrHQiDAUAgDDEAAAAAAADwPzkzMzMzMzMbQEKQAhoSEcL1KFyPwuU/IQAAAAAAAEFAGhsJwvUoXI/C5T8RwvUoXI/C9T8hAAAAAACATUAaGwnC9Shcj8L1PxFSuB6F61EAQCEAAAAAAAA/QBoSCVK4HoXrUQBAEcL1KFyPwgVAGhsJwvUoXI/CBUARMjMzMzMzC0AhAAAAAAAAJkAaGwkyMzMzMzMLQBFSuB6F61EQQCEAAAAAAAAAQBobCVK4HoXrURBAEQrXo3A9ChNAIQAAAAAAAABAGhsJCtejcD0KE0ARwvUoXI/CFUAhAAAAAAAAFEAaGwnC9Shcj8IVQBF6FK5H4XoYQCEAAAAAAAA6QBobCXoUrkfhehhAETMzMzMzMxtAIQAAAAAAACxAQpsCGhIRmpmZmZmZyT8hZmZmZmZmMkAaGwmamZmZmZnJPxFmZmZmZmbmPyFmZmZmZmYyQBobCWZmZmZmZuY/EQAAAAAAAPA/IWZmZmZmZjJAGhsJAAAAAAAA

## Create HTML page for Facets Overview

In [None]:
# Display the Facets Overview visualization for this data
from IPython.core.display import display, HTML

HTML_TEMPLATE = """
        <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))

In [None]:
#@title Relative entropy or Kullback-Leibler divergence example {display-mode: "form"}
from scipy.stats import entropy
X=[10,1,1,20,1,10,4]
Y=[1,2,3,4,2,2,5]
entropy(X,Y)

0.5396425997525232

# Facets Dive

In [None]:
#@title Python to_json example {display-mode: "form"}
from IPython.core.display import display, HTML
jsonstr=train_data.to_json(orient='records')
jsonstr

'[{"colored_sputum":1.0,"cough":3.5,"fever":9.4,"headache":3.0,"days":3,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":1.0,"cough":3.4,"fever":8.4,"headache":4.0,"days":2,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":1.0,"cough":3.3,"fever":7.3,"headache":3.0,"days":4,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":1.0,"cough":3.4,"fever":9.5,"headache":4.0,"days":2,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":1.0,"cough":2.0,"fever":8.0,"headache":3.5,"days":1,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":2.0,"cough":2.0,"fever":8.0,"headache":2.7,"days":3,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":2.0,"cough":3.6,"fever":8.0,"headache":4.6,"days":3,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":1.0,"cough":4.7,"fever":8.0,"headache":3.7,"days":2,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":2.0,"cough":6.2,"fever":8.0,"headache":2.7,"days":1,"france":0,"chicago":1,"class":"flu"},{"colored_sputum":2.0,"cou

In [None]:
# Display the Dive visualization for the training data.
from IPython.core.display import display, HTML

HTML_TEMPLATE = """
        <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html">
        <facets-dive id="elem" height="600"></facets-dive>
        <script>
          var data = {jsonstr};
          document.querySelector("#elem").data = data;
        </script>"""
html = HTML_TEMPLATE.format(jsonstr=jsonstr)
display(HTML(html))