## References

* [Adult Data Set](https://archive.ics.uci.edu/ml/datasets/Adult) (UCI)
* [Intro to Feature Engineering with TensorFlow](https://www.youtube.com/watch?v=d12ra3b_M-0) (Josh Gordon, YouTube)

In [None]:
import os
import sys

sys.path.append(os.path.join('facets/facets_overview/python'))

import base64
import pandas as pd

from IPython.core.display import display, HTML
from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator

display(HTML("""
<style>.container { width:100% !important; }</style>
<link rel='import' href='/nbextensions/facets-dist/facets-jupyter.html'>
"""))

In [None]:
path = 'data/minor.csv'

In [None]:
feature_names = [
    'Age',
    'Workclass',
    'fnlwgt',
    'Education',
    'Education-Num',
    'Marital Status',
    'Occupation',
    'Relationship',
    'Race',
    'Sex',
    'Capital Gain',
    'Capital Loss',
    'Hours per week',
    'Country',
    'Target',
]

data = pd.read_csv(path, names=feature_names, sep=r'\s*,\s*',
                   engine='python', skiprows=[0], na_values='?')

In [None]:
generator = GenericFeatureStatisticsGenerator()
proto = generator.ProtoFromDataFrames([{'name': 'adult', 'table': data}])
protostr = base64.b64encode(proto.SerializeToString()).decode('utf-8')

template = """
<facets-overview id='overview'></facets-overview>
<script>
document.querySelector('#overview').protoInput = '{data}';
</script>"""

display(HTML(template.format(data=protostr)))

In [None]:
jsonstr = data.to_json(orient='records')

template = """
<facets-dive id='dive' height='600'></facets-dive>
<script>
var data = {data};
document.querySelector('#dive').data = data;
</script>
"""

display(HTML(template.format(data=jsonstr)))