**Testing Facets Data Visualisation**

In [5]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Load UCI census train and test data into dataframes.
import pandas as pd

train_data = pd.read_csv(
    '/content/drive/My Drive/HCC work/Bridges.csv',
    engine='python',
    na_values="?")


In [7]:
train_data.columns

Index(['Unnamed: 0', 'feature_id', 'Name', 'Structure Type', 'Status',
       'Easting', 'Northing', 'Maintaining Agent', 'Construction Year',
       'Hertitage', 'critical_bci', 'length', 'Carries', 'Crosses', 'Material',
       'HCC Assessment Required?', 'Historic Capacity', 'Assessment Found?',
       'Assessment Date', 'Length Score', 'Crosses Score', 'Carries Score',
       'Route Score', 'BCI Score', 'Year', 'Material Score', 'Importance',
       'Severity', 'Critical', 'Total Score', 'Radius', 'Latitude',
       'Longitude'],
      dtype='object')

In [0]:
features = ['feature_id', 'Name', 'Structure Type', 'Status',
       'Easting', 'Northing', 'Maintaining Agent',
       'Hertitage', 'critical_bci', 'length', 'Carries', 'Crosses', 'Material',
       'HCC Assessment Required?', 'Historic Capacity', 'Assessment Found?',
       'Length Score', 'Crosses Score', 'Carries Score',
       'Route Score', 'BCI Score', 'Year', 'Material Score', 'Importance',
       'Severity', 'Critical', 'Total Score', 'Latitude',
       'Longitude']

In [0]:
display_data = train_data[features]

In [22]:
display_data['Year'] = display_data['Year'].fillna(0).astype(int)
display_data['Year'] = display_data['Year'].astype('int32')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [23]:

# Display the Dive visualization for the training data.
from IPython.core.display import display, HTML

jsonstr = display_data.to_json(orient='records')
HTML_TEMPLATE = """<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html">
        <facets-dive id="elem" height="600"></facets-dive>
        <script>
          var data = {jsonstr};
          document.querySelector("#elem").data = data;
        </script>"""
html = HTML_TEMPLATE.format(jsonstr=jsonstr)
display(HTML(html))

In [24]:
#!/usr/bin/env python
  
CSV_FILE_PATH = '/content/drive/My Drive/HCC work/Bridges.csv'
OUTPUT_FILE_PATH = '/content/drive/My Drive/HCC work/Bridges.html'
HTML_PAGE_TITLE = u'HCC Bridges Data'
  
# Number of lines from the CSV to be sub-sampled for the visualization.
# Set to None to disable sub-sampling.
SUBSAMPLE_SIZE = None
  
# Facets Dive settings. Inial layout of the visualized data.
# https://github.com/PAIR-code/facets/blob/master/facets_dive/README.md#interactive-properties
PRESETS = {
    u'verticalFacet': u'Critical',
    u'verticalBuckets': 10,
    u'horizontalFacet': u'Structure Type',
    u'horizontalBuckets': 14,
    u'colorBy': u'Material',
    u'imageFieldName': u'Name',
}
  
import json
import pandas as pd
  
df = pd.DataFrame.from_csv(CSV_FILE_PATH)
  
facets_dive_html_template = u"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8" />
    <title>%(title)s</title>
    <script>
        window.addEventListener('DOMContentLoaded', function() {
            var link = document.createElement('link');
            link.rel = "import";
            link.href = "https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html";
            link.onload = function() {
                var dive = document.createElement('facets-dive');
                dive.crossOrigin = "anonymous";
                dive.data = %(data)s;
                var presets = %(presets)s;
                for (var key in presets) {
                    if (presets.hasOwnProperty(key))
                        dive[key] = presets[key];
                }
                document.body.appendChild(dive);
            }
            document.head.appendChild(link);
        });
    </script>
    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/0.7.24/webcomponents-lite.js"></script>
    <style>body, html { height: 100%%; margin: 0; padding: 0; width: 100%%; }</style>
</head>
<body></body>
</html>
""".strip()
  
if SUBSAMPLE_SIZE:
    df = df.sample(SUBSAMPLE_SIZE)
  
with open(OUTPUT_FILE_PATH, "wb") as f:
    rendered_template = facets_dive_html_template % {
        'title': HTML_PAGE_TITLE,
        'data': df.to_json(orient='records'),
        'presets': json.dumps(PRESETS)
    }
    f.write(rendered_template.encode('utf-8'))



In [0]:
# Clone the facets github repo to get access to the python feature stats generation code
!git clone https://github.com/pair-code/facets.git

fatal: destination path 'facets' already exists and is not an empty directory.


In [0]:
# Add the path to the feature stats generation code.
import sys
sys.path.insert(0, '/content/facets/facets_overview/python/')

# Create the feature stats for the datasets and stringify it.
import base64
from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator

gfsg = GenericFeatureStatisticsGenerator()
proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': display_data}])
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")

In [26]:
# Display the facets overview visualization for this data
from IPython.core.display import display, HTML

HTML_TEMPLATE = """<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/master/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))

In [0]:
Html_file= open('/content/drive/My Drive/HCC work/Bridge_stats.html',"w")
Html_file.write(html)
Html_file.close()