# TemplateDataProject - Basic Run

# 0. Initialization

In [3]:
#@title Connect to gDrive & Load Files
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    PROJECT_FOLDER="/content/drive/MyDrive/workspace/template-data-project" #@param {"type":"string"}
    #@markdown - Connect to gDrive Folder
    ## Load this for local development version of library.. Loads from GoogleDrive (changes persist)
    from google.colab import drive, widgets, data_table; 
    drive.mount('/content/drive');
    %cd "$PROJECT_FOLDER"

    #@markdown - Install Requirements
    !pip install -qr requirements.txt

else:
    %cd /app
    print('Not running on CoLab')

    
#@markdown - Load .env
import re, os
lines = open('.env.yaml', 'r').read().split('\n')
for l in lines:
  segs = re.split(': *', l, 1)
  os.environ[segs[0]] = segs[1]

#@markdown - Import Utils .py FIles
import sys, os; from importlib import reload; sys.path.append(os.path.abspath("./src"))
from utils import notebook_helpers; reload(notebook_helpers)
!pwd

Running on CoLab
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/workspace/template-data-project
/content/drive/MyDrive/workspace/template-data-project


In [4]:
#@title Imports
import pandas as pd
import plotly.express as px


#1. Fetch and Parse Data
src/data/load_data.py


In [5]:
from data import load_data; reload(load_data)
df = load_data.load_data()

# 2. Explore Data

In [6]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,3
146,6.3,2.5,5.0,1.9,virginica,3
147,6.5,3.0,5.2,2.0,virginica,3
148,6.2,3.4,5.4,2.3,virginica,3


In [7]:
px.parallel_coordinates(df, color='species_id')

In [8]:
px.scatter_3d(df, 'sepal_length','petal_length','sepal_width',color='species')

# 3. Train Model
src/models/train_model.py

In [9]:
from models import train_model; reload(train_model)
inputCols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
targetCol = ['species_id']

model = train_model.trainModelForDf(df, inputCols, targetCol)

model.score 0.98



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



# 4. Test & Visualize Results

In [10]:
from models import predict_model; reload(predict_model)
from sklearn.metrics import confusion_matrix

## Create Test Set
df_test = df.sample(15)

## Predict
y_pred = predict_model.predict(model, df_test[inputCols])

## Display Results
pd.DataFrame(confusion_matrix(df_test[targetCol], y_pred))

Unnamed: 0,0,1,2
0,6,0,0
1,0,4,0
2,0,0,5


# 5. Interactive App

In [11]:
#@title Install jupyter_dash
!pip install -q jupyter-dash
import jupyter_dash
reload(jupyter_dash)

<module 'jupyter_dash' from '/usr/local/lib/python3.7/dist-packages/jupyter_dash/__init__.py'>

In [None]:
import json
from dash import dcc, html
# from dash.dependencies import Input, Output, State


from src.dash.apps import dash_global

app = jupyter_dash.JupyterDash(__name__)

app.layout = html.Div([
    html.H1('Modeling', style={"textAlign": "center"}),
])

if __name__ == '__main__':
    app.run_server(mode='inline',
      port=8052, host="0.0.0.0", debug=True, **{'width': '1200px', 'height': '800px'}
      # assets_external_path="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
    )

In [None]:
#@title Dash Inline - DataGrid Filter & 3d Scatter

import json
from dash import html, dash_table, dcc
from dash.dependencies import Input, Output, State

app = jupyter_dash.JupyterDash(__name__)

PAGE_SIZE = 20

app.layout = html.Div(
    style={'display': 'flex', 'flex-direction': 'row'},
    children=[
        html.Div(
            dash_table.DataTable(
                id='table-paging-with-graph',
                columns=[
                    {"name": i, "id": i} for i in sorted(df.columns)
                ],
                page_current=0, page_size=20, page_action='custom', 
                filter_action='custom', filter_query='',
                sort_action='custom', sort_mode='multi', sort_by=[]
            ),
            style={'height': 750, 'overflowY': 'scroll', 'padding':10},
        ),
        html.Div(
            id='table-paging-with-graph-container',
        ),
        dcc.Store(id='data-filtered')   # https://dash.plotly.com/sharing-data-between-callbacks
    ]
)

operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains '],
             ['datestartswith ']]


def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part

                # word operators need spaces after them in the filter string,
                # but we don't want these later
                return name, operator_type[0].strip(), value

    return [None] * 3

@app.callback(
    Output('data-filtered', "data"),
    Input('table-paging-with-graph', "sort_by"),
    Input('table-paging-with-graph', "filter_query"))
def update_table(sort_by, filter):
    filtering_expressions = filter.split(' && ')
    dff = df
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)

        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            dff = dff.loc[dff[col_name].str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            dff = dff.loc[dff[col_name].str.startswith(filter_value)]

    if len(sort_by):
        dff = dff.sort_values(
            [col['column_id'] for col in sort_by],
            ascending=[
                col['direction'] == 'asc'
                for col in sort_by
            ],
            inplace=False
        )

    return dff.to_json(orient='split')


@app.callback(
    Output('table-paging-with-graph', "data"),
    Input('data-filtered', "data"),
    Input('table-paging-with-graph', "page_current"),
    Input('table-paging-with-graph', "page_size"))
def update_table(rows, page_current, page_size):
  # dff = pd.DataFrame(rows)
  dff = pd.read_json(rows, orient='split')
  return dff.iloc[
      page_current*page_size: (page_current + 1)*page_size
  ].to_dict('records')


@app.callback(
    Output('table-paging-with-graph-container', "children"),
    Input('data-filtered', "data"))
def update_graph(rows):
    dff = pd.read_json(rows, orient='split')
    # dff = pd.DataFrame(rows)
    # return f"update_graph : dff.shape : {dff.shape}"
    return dcc.Graph(
        figure=px.scatter_3d(dff, 'sepal_length','petal_length','sepal_width', color='species')
    )

if __name__ == '__main__':
    app.run_server(mode='inline',
                   port=8051, host="0.0.0.0", debug=True, **{'width': '1200px', 'height': '800px'}
                  #  assets_external_path="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
                   )

## External URL
Recommend Opening a Terminal (bottom-left) and run the following commends
- `ngrok http 8051 --log=stdout > ngrok.log &`
- `cat ngrok.log`
- `python src/dash/index.py`

In [73]:
%%script false
#@title Skip.. Initialize NGROK
NGROK_AUTH="22ORETTuVHdmPUy8y4Vj4inqlQN_84tmp9jkzhKCJJ3kWGdK1" #@param
if 'NGROK_URL' not in globals():
  print('Loading ngrok..')
  !pip install pyngrok
  !ngrok authtoken NGROK_AUTH
  from pyngrok import ngrok
  NGROK_URL = ngrok.connect(addr=8051)
else : 
  print('Using Existing ngrok url..')

print(NGROK_URL)

Using Existing ngrok url..
NgrokTunnel: "http://cbb4-34-75-10-161.ngrok.io" -> "http://localhost:8051"


In [None]:
%%script false
#@title Skip.. Multi Page Dash App
from src.dash.apps import data_overview, modeling

app3 = jupyter_dash.JupyterDash(__name__)

app3.layout = html.Div([
    dcc.Location(id='url', refresh=False),
    html.Div([
        dcc.Link('Data Overview | ', href='/apps/data_overview'),
        dcc.Link('Modeling', href='/apps/modeling'),
    ], className="row"),
    html.Div(id='page-content2', children=[])
])


@app3.callback(Output('page-content2', 'children'),
              [Input('url', 'pathname')])
def display_page(pathname):
    if pathname == '/apps/data_overview':
        return data_overview.layout
    if pathname == '/apps/modeling':
        return modeling.layout
    else:
        return "404 Page Error! Please choose a link"

app3.run_server(mode='external',port=8051, debug=True, **{'width': '200px', 'height': '200px'})
# app3._terminate_server_for_port('localhost', port=8051)