# Sample Notebook that creates a classification model from a Pandas DataFrame

## Start by importing pixiedust and tensorflow

In [21]:
import pixiedust
import tensorflow as tf

## Browse the PixieDust built-in DataSets

In [2]:
pixiedust.sampleData()

Id,Name,Topic,Publisher
1,Car performance data,Transportation,IBM
2,"Sample retail sales transactions, January 2009",Economy & Business,IBM Cloud Data Services
3,Total population by country,Society,IBM Cloud Data Services
4,GoSales Transactions for Naive Bayes Model,Leisure,IBM
5,Election results by County,Society,IBM
6,"Million dollar home sales in Massachusetts, USA Feb 2017 through Jan 2018",Economy & Business,Redfin.com
7,"Boston Crime data, 2-week sample",Society,City of Boston


## Load the Boston Crime dataset

In [3]:
import pixiedust
crimes = pixiedust.sampleData(7)

Downloading 'Boston Crime data, 2-week sample' from https://raw.githubusercontent.com/ibm-watson-data-lab/open-data/master/crime/boston_crime_sample.csv
Downloaded 2986857 bytes
Creating pandas DataFrame for 'Boston Crime data, 2-week sample'. Please wait...
Loading file using 'pandas'
Successfully created pandas DataFrame for 'Boston Crime data, 2-week sample'


## Do some exploration using the "display()" API

In [None]:
display(crimes)

## Implement the training method using tensorflow DNNClassifier

In [5]:
import tensorflow as tf

def do_training(train, train_labels, test, test_labels, num_classes):
    #set TensorFlow logging level to INFO
    tf.logging.set_verbosity(tf.logging.INFO)

    # Build 2 hidden layer DNN with 10, 10 units respectively.
    classifier = tf.estimator.DNNClassifier(
        # Compute feature_columns from dataframe keys using list comprehension
        feature_columns =
            [tf.feature_column.numeric_column(key=key) for key in train.keys()],
        hidden_units=[10, 10],
        n_classes=num_classes)

    # Train the Model
    classifier.train(
        input_fn=lambda:train_input_fn(train, train_labels,100),
        steps=1000
    )

    # Evaluate the model
    eval_result = classifier.evaluate(
        input_fn=lambda:eval_input_fn(test, test_labels,100)
    )

    return (classifier, eval_result)

def input_fn(features, labels, batch_size, train):
    # Convert the inputs to a Dataset and shuffle.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)).shuffle(1000)
    if train:
        #repeat only for training
        dataset = dataset.repeat()
    # Return the dataset in batch
    return dataset.batch(batch_size)

def train_input_fn(features, labels, batch_size):
    return input_fn(features, labels, batch_size, train=True)

def eval_input_fn(features, labels, batch_size):
    return input_fn(features, labels, batch_size, train=False)

## Implement the PixieApp for creating the classification model

In [20]:
from pixiedust.display.app import *

@PixieApp
class SimpleClassificationDNN():
    @route()
    def main_screen(self):
        return """

<h1 style="margin:40px">
    <center>The classificiation model will be trained on all the numeric columns of the dataset</center>
</h1>

<style>
    div.outer-wrapper {
        display: table;width:100%;height:300px;
    }
    div.inner-wrapper {
        display: table-cell;vertical-align: middle;height: 100%;width: 100%;
    }
</style>
<div class="outer-wrapper">
    <div class="inner-wrapper">
        <div class="col-sm-3"></div>
        <div class="input-group col-sm-6">
          <select id="cols{{prefix}}" style="width:100%;height:30px" pd_options="predictor=$val(cols{{prefix}})">
              <option value="0">Select a predictor column</option>
              {%for col in this.pixieapp_entity.columns.values.tolist()%}
              <option value="{{col}}">{{col}}</option>
              {%endfor%}
          </select>
        </div>
    </div>
</div>     
        """
    
    @route(predictor="*")
    @templateArgs
    def prepare_training(self, predictor):
        #select only numerical columns
        self.dataset = self.pixieapp_entity.dropna(axis=1).select_dtypes(
            include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
        )
        #Compute the number of classed by counting the groups
        self.num_classes = self.dataset.groupby(predictor).size().shape[0]
        #Create the train and test feature and labels
        self.train_x=self.dataset.sample(frac=0.8)
        self.full_train = self.train_x.copy()
        self.train_y = self.train_x.pop(predictor)
        self.test_x=self.dataset.drop(self.train_x.index)
        self.full_test = self.test_x.copy()
        self.test_y=self.test_x.pop(predictor)
        
        bar_chart_options = {
          "rowCount": "100",
          "keyFields": predictor,
          "handlerId": "barChart",
          "noChartCache": "true"
        }
        
        return """
<div class="container" style="margin-top:20px">
    <div class="row">
        <div class="col-sm-5">
            <h3><center>Train set class distribution</center></h3>
            <div pd_entity="full_train" pd_render_onload>
                <pd_options>{{bar_chart_options|tojson}}</pd_options>
            </div>
        </div>
        <div class="col-sm-5">
            <h3><center>Test set class distribution</center></h3>
            <div pd_entity="full_test" pd_render_onload>
                <pd_options>{{bar_chart_options|tojson}}</pd_options>
            </div>
        </div>
    </div>
</div>

<div style="text-align:center">
    <button class="btn btn-default" type="submit" pd_options="do_training=true">
        Start Training
    </button>
</div>
"""
    
    @route(do_training="*")
    @captureOutput
    def do_training_screen(self):
        self.classifier, self.eval_results = \
            do_training(self.train_x, self.train_y, self.test_x, self.test_y, self.num_classes)
        return """
<h2>Training completed successfully</h2>
<table>
    <thead>
        <th>Metric</th>
        <th>Value</th>
    </thead>
    <tbody>
{%for key,value in this.eval_results.items()%}
<tr>
    <td>{{key}}</td>
    <td>{{value}}</td>
</tr>
{%endfor%}
    </tbody>
</table>
        """
    
app = SimpleClassificationDNN()
app.run(crimes)