# Dash Visualisation Python
[PCA](https://www.datacamp.com/community/tutorials/introduction-t-sne) combines your input features in a specific way that you can drop the least important feature while still retaining the most valuable parts of all of the features. As an added benefit, each of the new features or components created after PCA are all independent of one another.PCA it is a mathematical technique, but t-SNE is a probabilistic one. PCA, concentrate on placing dissimilar data points far apart in a lower dimension representation.
- t-SNE 
   + Clearer clustering
   - Takes longer
   + Captures complex Polynomial relationships
- PCA --> t-SNE (suppress some noise and speed up the computation of pairwise distances between samples).


In [1]:
import pandas as pd 
import numpy as np 
import sqlite3 as sql
import pickle

# Plotly
import plotly.graph_objects as go

# Web app
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_table

database_name = "data/data.db"

In [2]:
conn = sql.connect(database_name)
cur = conn.cursor()
df = pd.read_sql_query("SELECT * FROM clean_data", conn)
conn.close()

df.head(1)

Unnamed: 0,YQuarter,Ticker,gsector,Date,X1_REVGH,X2_EPS,X3_ROA,X4_ROE,X5_PE,X6_PS,...,PCA_3,PCA_4,PCA_5,PCA_6,PCA_7,PCA_8,PCA_9,PCA_10,PCA_11,PCA_12
0,1999Q3,A,Sector_35,1999-07-31 00:00:00,0.000267,0.251872,0.791643,0.974791,0.515213,1.2e-05,...,-0.08238,0.037696,-0.001543,-0.001425,-0.000367,0.0001,0.000303,5.1e-05,-0.000362,-0.000199


## APP 7  - Original
- [ ] App 7 More User customization %  
  - [ ] [Insert data table](https://dash.plot.ly/datatable), [Dynamic table](https://towardsdatascience.com/how-to-build-a-complex-reporting-dashboard-using-dash-and-plotl-4f4257c18a7f). [Dynamic Table2](https://dash-gallery.plotly.host/dash-medical-provider-charges/). [Dynamic Table3](https://dash-gallery.plotly.host/dash-drug-discovery/).
      - [ ] Use regex on _ to get feature column names from df
      - [ ] [Visrtualization improves frame rate](https://dash.plot.ly/datatable/virtualization).
  - [ ] [Soft Clustering](https://plot.ly/python/v3/3d-point-clustering/)
  - [ ] [Animate time-series](https://plot.ly/python/animations/#animated-figures-with-graph-objects)
  - [ ] [Cross Filtering](https://dash.plot.ly/interactive-graphing)
      - [ ] Tabs for different data: [Fundamental, Economic, Technical]
      - [ ] Hover over table to get PCA explained variance by feature


In [6]:
quarter_options = [{'label': i, 'value': i} for i in list(df["YQuarter"].unique())]
year_options = pd.DatetimeIndex(df['Date']).year
sector_options = [{'label': i, 'value': i} for i in list(pd.Series(df["gsector"].unique()).sort_values(axis=0, ascending=True))]

# DEFINE HTML TEMPLATE
app = dash.Dash() # Boostrap CSS.
app.css.append_css({'external_url': 'https://codepen.io/amyoshino/pen/jzXypZ.css'}) 

app.layout = html.Div([
    dcc.Tabs(id="tabs", children=[  # ALL TABS START
        
        # TAB 1. 3D Scatter Plot: PCA & Clustering
        dcc.Tab(label= '3D PCA & Clustering', children=[
            # =========================== PLOT 1 ======================================== #
            html.Div([
                html.H1(" Three Component Scatter Plot ", style={'textAlign': 'center'}),
                #1              
                dcc.RangeSlider( id = "date_1",
                                 updatemode = "drag", 
                                 min = year_options.min(),
                                 max = year_options.max(),
                                 marks = {i: '{}'.format(i) for i in year_options.unique().tolist()},
                                 value=[2013, 2015],                                                    ),
                #2
                html.Div(id='slider_selection'),
                #3
                html.Button(id='ticker_1', 
                            n_clicks = 0,
                            children = "Submit"),
                #4
                dcc.Dropdown(id='quarter_1',
                             options=quarter_options, 
                             multi=True,
                             value= ['2013Q1'] ,  #, '2014Q1'
                             style={"display": "block","margin-left": "auto","margin-right": "auto","width": "40%"}
                             ),                
                #5
                dcc.Checklist(id='sector_1',
                              options=sector_options,
                              value= ['Sector_35', 'Sector_15', 'Sector_45', 'Sector_20', 
                                      'Sector_40', 'Sector_25', 'Sector_30', 'Sector_55', 
                                      'Sector_50', 'Sector_60', 'Sector_10'] ,             ),
                
                #6
                dcc.Graph(id = "scatter_1"),
                        ],className='twelver columns'),
                #,className='eight columns'), # 3D scatter 8/12 columns in Tab1
            
            # =========================== PLOT 2 ======================================== #
            html.Div([
                dash_table.DataTable(id = "table_1",
                                     columns = [{"name": i, "id": i} for i in df.iloc[:, 4:25]], #4-25
                                     editable=True,  
#                                      n_fixed_columns=2,
#                                      style_table={'maxWidth': '1500px'},
                                     row_selectable="multi",       
                                     data  = df.iloc[:, 4:6].to_json(),
                                     virtualization = True,

#                                      virtualization = True,
#                                      page_action = "none",
                                    ),
                
                        ],className='twelver columns'),
                #className='four columns'), # Data table 4/12 columns in Tab1
            
            
        ], className='row'),  # END TAB 1,  Set Columns
        
   ]), # ALL TABS END  
], className="container")


"===================================================================================================="
"===================================================================================================="


@app.callback([Output('scatter_1', 'figure'),
               Output('slider_selection', 'children'), ],
               #Output('table_1', 'data'),  ], # Dynamic data table 
              
              [Input ('date_1','value'),
               Input('ticker_1', 'n_clicks'),
               Input('quarter_1','value'),          
               Input('sector_1','value'),           ]   ) 

def scatter_3D (rangeD, n_clicks, quarterD, sectorD ):  # FIX
    # Call data from SQL db
    " NEED TO HAVE ALL INFO "
    conn = sql.connect(database_name)
    cur = conn.cursor()
    df2 = pd.read_sql_query("SELECT * FROM clean_data", conn)
    df2["Date"] = pd.DatetimeIndex(df2["Date"]).year
    conn.close()    
    
    dataset = df2[(df2["Date"] >= rangeD[0]) &  # Slice data according to lower year range 
                   (df2["Date"] <= rangeD[1]) & # Slice data according to upper year range
                   (df2["YQuarter"].isin(quarterD)) & # Slice data according to quarter                       
                   (df2["gsector"].isin(sectorD))  # Slice data according to sector                            
                  ]  

    trace1 = go.Scatter3d(x = dataset["PCA_1"],   # Exponential PCA
                          y = dataset["PCA_2"],
                          z = dataset["PCA_3"], # np.exp(dataset["y_return"])*10,  #)
                          mode = "markers+text",
                          hovertext =  dataset[["Ticker","gsector","Date"]].values.tolist(),   #dataset["Ticker"].tolist(),                          hoverinfo="text",
                          marker = dict(size= abs((np.exp(df["y_return"])**4)*2), # scaled to increse std #np.exp(dataset["y_return"]+1.5)*2,#*20   # SCALE FIX
                                        sizemin = 1.5,  # min size
                                        color= dataset["Cluster"],  # THIS CAN BE DIFFERENT
                                        colorscale = "Portland",
                                        #name = list(df["Cluster"].unique()),
                                        colorbar = {"thickness": 10, "len": 0.5, "x": 0.8, "y": 0.6, },
                                        symbol = ['cross', 'diamond', 'square', 'square-open', # number markers == sectors
                                                  'square', 'diamond-open', 'cross', 'x', 'square','square', 'circle-open'],
                                        showscale = False,
                                       ),  

                         )
    
    conn.close() # Close DB

    data = [ trace1 ]
    
    
    layout = go.Layout( 
             title = "3D PCA Scatter Plot",
             # UX
             height = 1000,  #1500,
             width = 1000,  #1500,
             scene = dict( # UI
                          aspectmode = "cube",  
                          hovermode = "closest",
                          dragmode = "orbit", #  "orbit" | "turntable" | "zoom" | "pan" | False
                          xaxis = {"title": "PCA 1", },
                          yaxis = {"title": "PCA 2", },
                          zaxis = {"title": "PCA 3", },  )   ) #{"title": "Returns", },  )   ) 

    fig =( {"data":data,
            "layout":layout})
    
    
    
    return fig , "Date Range {} - {}.".format(str(rangeD[0]),str(rangeD[1])) #, data_table

if __name__ == '__main__':
    app.run_server()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [24/Jan/2020 14:50:25] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 14:50:26] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 14:50:27] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 14:50:37] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 14:50:43] "GET /_favicon.ico?v=1.7.0 HTTP/1.1" 200 -


In [None]:
### import re
# [{"name": re.match('\$*'), "id": i} for i in df.iloc[:, 4:25]]

## V2

In [None]:
quarter_options = [{'label': i, 'value': i} for i in list(df["YQuarter"].unique())]
year_options = pd.DatetimeIndex(df['Date']).year
sector_options = [{'label': i, 'value': i} for i in list(pd.Series(df["gsector"].unique()).sort_values(axis=0, ascending=True))]

# DEFINE HTML TEMPLATE
app = dash.Dash() # Boostrap CSS.
app.css.append_css({'external_url': 'https://codepen.io/amyoshino/pen/jzXypZ.css'}) 

app.layout = html.Div([
    dcc.Tabs(id="tabs", children=[  # ALL TABS START
        
        # TAB 1. 3D Scatter Plot: PCA & Clustering
        dcc.Tab(label= '3D PCA & Clustering', children=[
            # =========================== PLOT 1 ======================================== #
            html.Div([
                html.H1(" Three Component Scatter Plot ", style={'textAlign': 'center'}),
                #1              
                dcc.RangeSlider( id = "date_1",
                                 updatemode = "drag", 
                                 min = year_options.min(),
                                 max = year_options.max(),
                                 marks = {i: '{}'.format(i) for i in year_options.unique().tolist()},
                                 value=[2013, 2015],                                                    ),
                #2
                html.Div(id='slider_selection'),
                #3
                html.Button(id='ticker_1', 
                            n_clicks = 0,
                            children = "Submit"),
                #4
                dcc.Dropdown(id='quarter_1',
                             options=quarter_options, 
                             multi=True,
                             value= ['2013Q1'] ,  #, '2014Q1'
                             style={"display": "block","margin-left": "auto","margin-right": "auto","width": "40%"}
                             ),                
                #5
                dcc.Checklist(id='sector_1',
                              options=sector_options,
                              value= ['Sector_35', 'Sector_15', 'Sector_45', 'Sector_20', 
                                      'Sector_40', 'Sector_25', 'Sector_30', 'Sector_55', 
                                      'Sector_50', 'Sector_60', 'Sector_10'] ,             ),
                
                #6
                dcc.Graph(id = "scatter_1"),
                        ],className='twelver columns'),
                #,className='eight columns'), # 3D scatter 8/12 columns in Tab1
            
            # =========================== PLOT 2 ======================================== #
            html.Div([
                dash_table.DataTable(id = "table_1",
                                     columns = [{"name": i, "id": i} for i in df.iloc[:, 4:6]], #4-25
                                     editable=True,  
#                                      n_fixed_columns=2,
#                                      style_table={'maxWidth': '1500px'},
                                     row_selectable="multi",       
                                     data  = df.iloc[:, 4:6].to_json(),
                                     virtualization = True,

#                                      virtualization = True,
#                                      page_action = "none",
                                    ),
                
                        ],className='twelver columns'),
                #className='four columns'), # Data table 4/12 columns in Tab1
            
            
        ], className='row'),  # END TAB 1,  Set Columns
        
   ]), # ALL TABS END  
], className="container")


"===================================================================================================="
"===================================================================================================="


@app.callback([Output('scatter_1', 'figure'),
               Output('slider_selection', 'children'), ],
               #Output('table_1', 'data'),  ], # Dynamic data table 
              
              [Input ('date_1','value'),
               Input('ticker_1', 'n_clicks'),
               Input('quarter_1','value'),          
               Input('sector_1','value'),           ]   ) 

def scatter_3D (rangeD, n_clicks, quarterD, sectorD ):  # FIX
    # Call data from SQL db
    " NEED TO HAVE ALL INFO "
    conn = sql.connect(database_name)
    cur = conn.cursor()
    df2 = pd.read_sql_query("SELECT * FROM clean_data", conn)
    df2["Date"] = pd.DatetimeIndex(df2["Date"]).year
    conn.close()    
    
    dataset = df2[(df2["Date"] >= rangeD[0]) &  # Slice data according to lower year range 
                   (df2["Date"] <= rangeD[1]) & # Slice data according to upper year range
                   (df2["YQuarter"].isin(quarterD)) & # Slice data according to quarter                       
                   (df2["gsector"].isin(sectorD))  # Slice data according to sector                            
                  ]  

    trace1 = go.Scatter3d(x = dataset["PCA_1"],   # Exponential PCA
                          y = dataset["PCA_2"],
                          z = dataset["PCA_3"], # np.exp(dataset["y_return"])*10,  #)
                          mode = "markers+text",
                          hovertext =  dataset[["Ticker","gsector","Date"]].values.tolist(),   #dataset["Ticker"].tolist(),                          hoverinfo="text",
                          marker = dict(size= abs((np.exp(df["y_return"])**4)*2), # scaled to increse std #np.exp(dataset["y_return"]+1.5)*2,#*20   # SCALE FIX
                                        sizemin = 1.5,  # min size
                                        color= dataset["Cluster"],  # THIS CAN BE DIFFERENT
                                        colorscale = "Portland",
                                        #name = list(df["Cluster"].unique()),
                                        colorbar = {"thickness": 10, "len": 0.5, "x": 0.8, "y": 0.6, },
                                        symbol = ['cross', 'diamond', 'square', 'square-open', # number markers == sectors
                                                  'square', 'diamond-open', 'cross', 'x', 'square','square', 'circle-open'],
                                        showscale = False,
                                       ),  

                         )
    
    conn.close() # Close DB

    data = [ trace1 ]
    
    
    layout = go.Layout( 
             title = "3D PCA Scatter Plot",
             # UX
             height = 1000,  #1500,
             width = 1000,  #1500,
             scene = dict( # UI
                          aspectmode = "cube",  
                          hovermode = "closest",
                          dragmode = "orbit", #  "orbit" | "turntable" | "zoom" | "pan" | False
                          xaxis = {"title": "PCA 1", },
                          yaxis = {"title": "PCA 2", },
                          zaxis = {"title": "PCA 3", },  )   ) #{"title": "Returns", },  )   ) 

    fig =( {"data":data,
            "layout":layout})
    
    
    
    return fig , "Date Range {} - {}.".format(str(rangeD[0]),str(rangeD[1])) #, data_table

if __name__ == '__main__':
    app.run_server()

## Animated plots - *Not supported with Dash*
- [x] [Animate scatter plots](https://plot.ly/python/v3/gapminder-example/)

In [4]:
quarter_options = [{'label': i, 'value': i} for i in list(df["YQuarter"].unique())]
year_options = pd.DatetimeIndex(df['Date']).year
sector_options = [{'label': i, 'value': i} for i in list(pd.Series(df["gsector"].unique()).sort_values(axis=0, ascending=True))]

# DEFINE HTML TEMPLATE
app = dash.Dash() # Boostrap CSS.
app.css.append_css({'external_url': 'https://codepen.io/amyoshino/pen/jzXypZ.css'}) 

app.layout = html.Div([
    dcc.Tabs(id="tabs", children=[  # ALL TABS START
        
        # TAB 1. 3D Scatter Plot: PCA & Clustering
        dcc.Tab(label= '3D PCA & Clustering', children=[
            # =========================== PLOT 1 ======================================== #
            html.Div([
                html.H1(" Three Component Scatter Plot ", style={'textAlign': 'center'}),
                #1              
                dcc.RangeSlider( id = "date_1",
                                 updatemode = "drag", 
                                 min = year_options.min(),
                                 max = year_options.max(),
                                 marks = {i: '{}'.format(i) for i in year_options.unique().tolist()},
                                 value=[2013, 2015],                                                    ),
                #2
                html.Div(id='slider_selection'),
                #3
                html.Button(id='ticker_1', 
                            n_clicks = 0,
                            children = "Submit"),
                #4
                dcc.Dropdown(id='quarter_1',
                             options=quarter_options, 
                             multi=True,
                             value= ['2013Q1'] ,  #, '2014Q1'
                             style={"display": "block","margin-left": "auto","margin-right": "auto","width": "40%"}
                             ),                
                #5
                dcc.Checklist(id='sector_1',
                              options=sector_options,
                              value= ['Sector_35', 'Sector_15', 'Sector_45', 'Sector_20', 
                                      'Sector_40', 'Sector_25', 'Sector_30', 'Sector_55', 
                                      'Sector_50', 'Sector_60', 'Sector_10'] ,             ),
                
                #6
                dcc.Graph(id = "scatter_1"),
                        ],className='twelver columns'),
                #,className='eight columns'), # 3D scatter 8/12 columns in Tab1
            
            # =========================== PLOT 2 ======================================== #
            html.Div([
                dash_table.DataTable(id = "table_1",
                                     columns = [{"name": i, "id": i} for i in df.iloc[:, 4:6]], #4-25
                                     editable=True,  
#                                      n_fixed_columns=2,
#                                      style_table={'maxWidth': '1500px'},
                                     row_selectable="multi",       
                                     data  = df.iloc[:, 4:6].to_json(),
                                     virtualization = True,

#                                      virtualization = True,
#                                      page_action = "none",
                                    ),
                
                        ],className='twelver columns'),
                #className='four columns'), # Data table 4/12 columns in Tab1
            
            
        ], className='row'),  # END TAB 1,  Set Columns
        
   ]), # ALL TABS END  
], className="container")


"===================================================================================================="
"===================================================================================================="


@app.callback([Output('scatter_1', 'figure'),
               Output('slider_selection', 'children'), ],
               #Output('table_1', 'data'),  ], # Dynamic data table 
              
              [Input ('date_1','value'),
               Input('ticker_1', 'n_clicks'),
               Input('quarter_1','value'),          
               Input('sector_1','value'),           ]   ) 

def scatter_3D (rangeD, n_clicks, quarterD, sectorD ):  # FIX
    # Call data from SQL db
    " NEED TO HAVE ALL INFO "
    conn = sql.connect(database_name)
    cur = conn.cursor()
    df2 = pd.read_sql_query("SELECT * FROM clean_data", conn)
    df2["Date"] = pd.DatetimeIndex(df2["Date"]).year
    conn.close()    
    
    dataset = df2[(df2["Date"] >= rangeD[0]) &  # Slice data according to lower year range 
                   (df2["Date"] <= rangeD[1]) & # Slice data according to upper year range
                   (df2["YQuarter"].isin(quarterD)) & # Slice data according to quarter                       
                   (df2["gsector"].isin(sectorD))  # Slice data according to sector                            
                  ]  

    trace1 = go.Scatter3d(x = dataset["PCA_1"],   # Exponential PCA
                          y = dataset["PCA_2"],
                          z = dataset["PCA_3"], # np.exp(dataset["y_return"])*10,  #)
                          mode = "markers+text",
                          hovertext =  dataset[["Ticker","gsector","Date"]].values.tolist(),   #dataset["Ticker"].tolist(),                          hoverinfo="text",
                          marker = dict(size= abs((np.exp(df["y_return"])**4)*2), # scaled to increse std #np.exp(dataset["y_return"]+1.5)*2,#*20   # SCALE FIX
                                        sizemin = 1.5,  # min size
                                        color= dataset["Cluster"],  # THIS CAN BE DIFFERENT
                                        colorscale = "Portland",
                                        opacity = 0.5,
                                        sizemode='diameter',
                                        #name = list(df["Cluster"].unique()),
                                        colorbar = {"thickness": 10, "len": 0.5, "x": 0.8, "y": 0.6, },
                                        symbol = ['cross', 'diamond', 'square', 'square-open', # number markers == sectors
                                                  'square', 'diamond-open', 'cross', 'x', 'square','square', 'circle-open'],
                                        showscale = False,
                                       ),  

                         )
    
    conn.close() # Close DB

    data = [ trace1 ]
    
    
    layout = go.Layout( 
             title = "3D PCA Scatter Plot",
             # UX
             height = 1000,  #1500,
             width = 1000,  #1500,
             scene = dict( # UI
                          aspectmode = "cube",  
                          hovermode = "closest",
                          dragmode = "orbit", #  "orbit" | "turntable" | "zoom" | "pan" | False
                          xaxis = {"title": "PCA 1", },
                          yaxis = {"title": "PCA 2", },
                          zaxis = {"title": "PCA 3", },  )   ) #{"title": "Returns", },  )   ) 

    fig =( {"data":data,
            "layout":layout})
    
    
    
    return fig , "Date Range {} - {}.".format(str(rangeD[0]),str(rangeD[1])) #, data_table

if __name__ == '__main__':
    app.run_server()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [24/Jan/2020 10:26:06] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 10:26:08] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 10:26:08] "GET /_favicon.ico?v=1.7.0 HTTP/1.1" 200 -
127.0.0.1 - - [24/Jan/2020 10:26:09] "GET /_dash-layout HTTP/1.1" 200 -
