# Runnuing & configuring Dash dashboard

Articles for troubleshooting:

* https://stackoverflow.com/questions/70118476/running-jupyter-dash-in-docker-container
* https://dash.plotly.com/live-updates

In [1]:
from dash import Dash, html, dcc, callback, Output, Input, dash_table
import plotly.express as px
import pandas as pd

In [2]:
import clickhouse_connect

In [3]:
from datetime import datetime, timedelta

In [4]:
def ch_stats(cmd):
    def ch_df(limit=0):
        client = clickhouse_connect.get_client(
            host='clickhouse_server', 
            username='altenar', 
            password='altenar_ch_demo_517'
        )
        if limit > 0:
            df = client.query_df(query=cmd + " LIMIT {}".format(limit))
        else:
            df = client.query_df(query=cmd)
            # cmd = cmd + " LIMIT {}".format(limit)
        # print(cmd)
        
        client.close()
        return df
    return ch_df

In [5]:
repo_ownership_stats = ch_stats(
    cmd="""
    SELECT
        repo_author,
        countDistinct(repo_name) AS repos_total
    FROM gharchive.repos
    GROUP BY repo_author
    HAVING repos_total > 1
    ORDER BY repos_total DESC
    """
)
# df_t = repo_ownership_stats(limit=100)
# df_t

In [6]:
commits_gt1_stats = ch_stats(
    cmd = """
    SELECT                                                                                                                                                                                                                                                                   
        date,
        author_name,
        count(*) AS total_commits
    FROM gharchive.commits
    GROUP BY
        date,
        author_name
    HAVING total_commits >= 2
    ORDER BY
        author_name ASC,
        total_commits DESC
    """
)
# df_t = commits_gt1_stats()
# df_t

In [7]:
commits_dts_stats = ch_stats(
    cmd = """
    SELECT
        toDate(min(created_at)) AS min_created_at,
        toDate(max(created_at)) AS max_created_at
    FROM gharchive.commits
    """
)
dates_df = commits_dts_stats()
dates_df

Unnamed: 0,min_created_at,max_created_at
0,2023-07-25,2023-07-29


In [8]:
commits_lt1_stats = ch_stats(
    cmd = """
    SELECT t.author_name, t.interval_start_dt
    FROM
    (
        SELECT
            author_name,
            created_at,
            1 AS total,
            toStartOfInterval(created_at, toIntervalDay(1)) AS interval_start_dt,
            sum(total) OVER (PARTITION BY author_name, interval_start_dt ORDER BY created_at ASC) AS sum_commits
        FROM gharchive.commits
        ORDER BY
            author_name ASC,
            interval_start_dt ASC 
                WITH FILL 
                FROM toUnixTimestamp('{}') 
                TO toUnixTimestamp('{}') 
                STEP toIntervalDay(1)
    ) AS t
    WHERE sum_commits = 0
    """.format(
        dates_df['min_created_at'][0].strftime("%F"),
        (dates_df['max_created_at'][0] + timedelta(days=1)).strftime("%F")
    )
)
# df_t = commits_lt1_stats()
# df_t

In [9]:
members_gt10_stats = ch_stats(
    cmd = """
    SELECT
        repo_name,
        repo_name_full,
        countDistinct(member_login) AS total_members
    FROM gharchive.members
    GROUP BY
        repo_name,
        repo_name_full
    HAVING total_members > 10
    ORDER BY 
        total_members DESC,
        repo_name ASC
    """
)
# df_t = members_gt10_stats()
# df_t

In [10]:
members_basic_stats = ch_stats(
    cmd = """
    SELECT
        date,
        toHour(created_at) h,
        count(*) AS total
    FROM gharchive.members
    GROUP BY date, h
    ORDER BY date DESC, h DESC
    """
)

In [11]:
commits_basic_stats = ch_stats(
    cmd = """
    SELECT
        date,
        toHour(created_at) h,
        count(*) AS total
    FROM gharchive.commits
    GROUP BY date, h
    ORDER BY date DESC, h DESC
    """
)

In [12]:
repos_basic_stats = ch_stats(
    cmd = """
    SELECT
        date,
        count(*) AS total
    FROM gharchive.repos
    GROUP BY date
    ORDER BY date DESC
    """
)

In [17]:
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = Dash(__name__, external_stylesheets=external_stylesheets)

def main_layout():
    return html.Div([
        html.H1(children='GitHub Archive Statistics', style={'textAlign':'center'}),
        html.P(children='Disclaimer: For performance reasons, only Top 100 records are shown.', style={'textAlign':'center'}),
        html.P(children='Refresh Interval: 1m.', style={'textAlign':'center'}),
        
        html.H2(children='Repos Total', style={'textAlign':'left'}),
        html.Div(id='live-update-repos'),
        
        html.H2(children='Commits Total', style={'textAlign':'left'}),
        html.Div(id='live-update-commits'),
        
        html.H2(children='Members Total', style={'textAlign':'left'}),
        html.Div(id='live-update-members'),
    
        dcc.Interval(
            id='interval-component',
            interval=1*60*1000, # in milliseconds
            n_intervals=0
        )
    ])

app.layout = main_layout()

@callback(
    Output('live-update-repos', 'children'),
    Input('interval-component', 'n_intervals')
)
def update_repos_df(n):
    df_basic = repos_basic_stats()
    df = repo_ownership_stats(limit=100)
    if len(df_basic) == 0:
        return html.P(children='Nothing to show. The repos table is empty!', style={'textAlign':'center'})
    else:
        return [
            html.H3(children='Repo Owners with >= 2 Repos', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                dash_table.DataTable(
                    data=df.to_dict('records'), 
                    page_size=10,
                    columns=[{"name": i, "id": i} for i in df.columns], 
                    style_table={'overflowX': 'auto'}
                ),
            ]),
            html.H3(children='Basic Repos Table Stats', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                html.Div(className='six columns', children=[
                    dcc.Graph(id='graph-bars-repos', figure = px.bar(df_basic.groupby('date')['total'].sum().reset_index(), x='date', y='total')),
                ]),
                html.Div(className='six columns', children=[
                    dash_table.DataTable(
                        data=df_basic.to_dict('records'), 
                        page_size=10,
                        columns=[{"name": i, "id": i} for i in df_basic.columns], 
                        style_table={'overflowX': 'auto'}
                    ),
                ]),
            ])
        ]

@callback(
    Output('live-update-commits', 'children'),
    Input('interval-component', 'n_intervals')
)
def update_commits_df(n):
    df_basic = repos_basic_stats()
    df_gt = commits_gt1_stats(limit=100)
    df_lt = commits_lt1_stats(limit=100)
    if len(df_basic) == 0:
        return html.P(children='Nothing to show. The commits table is empty!', style={'textAlign':'center'})
    else:
        return [
            html.H3(children='Developers with > 1 commit in a day', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                dash_table.DataTable(
                    data=df_gt.to_dict('records'), 
                    page_size=10,
                    columns=[{"name": i, "id": i} for i in df_gt.columns], 
                    style_table={'overflowX': 'auto'}
                ),
            ]),
            html.H3(children='Developers with < 1 commit in a day', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                dash_table.DataTable(
                    data=df_lt.to_dict('records'), 
                    page_size=10,
                    columns=[{"name": i, "id": i} for i in df_lt.columns], 
                    style_table={'overflowX': 'auto'}
                ),
            ]),
            html.H3(children='Basic Commits Table Stats', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                html.Div(className='six columns', children=[
                    dcc.Graph(id='graph-bars-commits', figure = px.bar(df_basic.groupby('date')['total'].sum().reset_index(), x='date', y='total')),
                ]),
                html.Div(className='six columns', children=[
                    dash_table.DataTable(
                        data=df_basic.to_dict('records'), 
                        page_size=10,
                        columns=[{"name": i, "id": i} for i in df_basic.columns], 
                        style_table={'overflowX': 'auto'}
                    ),
                ]),
            ])
        ]
    

@callback(
    Output('live-update-members', 'children'),
    Input('interval-component', 'n_intervals')
)
def update_members_df(n):
    df_basic = members_basic_stats()
    df = members_gt10_stats(limit=100)
    if len(df_basic) == 0:
        return html.P(children='Nothing to show. The members table is empty!', style={'textAlign':'center'})
    else:
        return [
            html.H3(children='Total Projects with > 10 Members: {}'.format(df['repo_name_full'].count()), style={'textAlign':'center'}),
            html.Div(className='row', children=[
                dash_table.DataTable(
                    data=df.to_dict('records'), 
                    page_size=10,
                    columns=[{"name": i, "id": i} for i in df.columns], 
                    style_table={'overflowX': 'auto'}
                ),
            ]),
            html.H3(children='Basic Members Table Stats', style={'textAlign':'center'}),
            html.Div(className='row', children=[
                html.Div(className='six columns', children=[
                    dcc.Graph(id='graph-bars-members', figure = px.bar(df_basic.groupby('date')['total'].sum().reset_index(), x='date', y='total')),
                ]),
                html.Div(className='six columns', children=[
                    dash_table.DataTable(
                        data=df_basic.to_dict('records'), 
                        page_size=10,
                        columns=[{"name": i, "id": i} for i in df_basic.columns], 
                        style_table={'overflowX': 'auto'}
                    ),
                ]),
            ])
        ]



app.run_server(mode='inline', host="0.0.0.0", port=8050, dev_tools_ui=True, debug=True)
    
# if __name__ == '__main__':
#     app.run(debug=True)

In [180]:
dir(app)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_assets_resource',
 '_add_url',
 '_assets_files',
 '_background_manager',
 '_callback_list',
 '_collect_and_register_resources',
 '_config',
 '_dev_tools',
 '_extra_components',
 '_favicon',
 '_generate_config_html',
 '_generate_css_dist_html',
 '_generate_meta',
 '_generate_renderer',
 '_generate_scripts_html',
 '_got_first_request',
 '_hot_reload',
 '_index_string',
 '_inline_scripts',
 '_invalid_resources_handler',
 '_layout',
 '_layout_is_function',
 '_layout_value',
 '_long_callback_count',
 '_on_assets_change',
 '_serve_default_favicon',
 '_setup_dev_tools',
 '_setup_routes',
 '_setup_server',
