In [None]:
import sys
sys.path.append('..')

from utils.parse_cfg import parse_cfg
from google_sheet.io import get_from_sheet
import plotly.graph_objects as go
import datetime
from itertools import chain

import numpy as np

## Access Raw Data

In [None]:
config = parse_cfg('../temp/config.json')

summary_names = get_from_sheet(config['spreadsheet_id'], config['summary_name']+'!A2:A', cred_folder='../temp')
froms = get_from_sheet(config['spreadsheet_id'], config['sheet_name']+'!A2:A', cred_folder='../temp')
tos = get_from_sheet(config['spreadsheet_id'], config['sheet_name']+'!B2:B', cred_folder='../temp')
dates = get_from_sheet(config['spreadsheet_id'], config['sheet_name']+'!C2:C', cred_folder='../temp')

In [None]:
froms = [from_[0] for from_ in froms]
tos = [to[0] for to in tos]

dated = [datetime.datetime.strptime(date[0], '%Y-%m-%d %H:%M:%S %Z%z').date() for date in dates]

### Filter Dates

In [None]:
def filter_date_range(lists, dated, start, end=None):
    filtered = []
    for i, date in enumerate(dated):
        if date >= start:
            if end:
                if date <= end:
                    filtered.append(i)
            else:
                filtered.append(i)

    filtered_lists = []
    for list in lists:
        list = [list[i] for i in filtered]
        filtered_lists.append(list)

    return filtered_lists

In [None]:
filtered = filter_date_range([froms, tos], dated, datetime.datetime(2024, 1, 1).date())

## Compose Sankey Graph

In [None]:
def gen_sankey(froms, tos, filter_out=[]):
    nodes = list(set(chain(froms, tos)))
    # nodes_bicopy = list(chain(nodes, [node+'_' for node in nodes]))
    
    links = {}

    for to_, from_ in zip(tos, froms):
        if to_ in filter_out or from_ in filter_out:
            continue
        if (from_, to_) in links:
            links[(from_, to_)] += 1
        else:
            links[(from_, to_)] = 1

    links = [{'source': from_, 'target': to_, 'value': value} for (from_, to_), value in links.items()]

    ins_accum = {}
    outs_accum = {}
    
    for link in links:
        if link['source'] not in outs_accum:
            outs_accum[link['source']] = 0
        if link['target'] not in ins_accum:
            ins_accum[link['target']] = 0
        ins_accum[link['target']] += link['value']
        outs_accum[link['source']] += link['value']
    
    ins_order = sorted(list(ins_accum.items()), key=lambda x: x[1], reverse=True)
    outs_order = sorted(list(outs_accum.items()), key=lambda x: x[1], reverse=True)

    nodes_bicopy = list(chain([node+f'_({count})' for node, count in outs_order], [node+f' ({count})' for node, count in ins_order]))

    xs = [0.1]*len(outs_order) + [0.9]*len(ins_order)
    ys = list(chain(list(np.linspace(0.01, 0.99, len(outs_order))), list(np.linspace(0.01, 0.99, len(ins_order)))))


    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=nodes_bicopy,
            x = xs,
            y = ys,
            # y = list(chain(ys_out, ys_in)),
            ),
        link=dict(
            source=[nodes_bicopy.index(link['source']+f"_({outs_accum[link['source']]})") for link in links],
            target=[nodes_bicopy.index(link['target']+f" ({ins_accum[link['target']]})") for link in links],
            value=[link['value'] for link in links]
        ),
        arrangement='snap'
    )])

    return fig

In [None]:
fig = gen_sankey(filtered[0], filtered[1])
fig.update_layout(title_text="Thanks Bot Utilization Summary", font_size=10, )
fig.show()

fig.write_html('sankey.html')

In [None]:
fig.write_image("name.svg")