In [1]:
import pandas as pd
import plotly.graph_objects as go

import sys

sys.path.append("../scripts")

from shared import get_data, IVY_COLORS

In [2]:
df = get_data()
df.head()

Unnamed: 0,Partner,Location,Date,Type,Direction,System,Pick location
0,Harvard University,Matthews Fuller Health Sciences Library Stacks,2023-09-20 16:42:00.390000,,Lent,ILL,
1,Harvard University,Dartmouth Library Depository Depository,2023-07-25 10:01:24.610000,,Lent,ILL,
3,Harvard University,Matthews Fuller Health Sciences Library Stacks,2023-11-13 16:34:05.423000,,Lent,ILL,
8,Harvard University,DLD,2023-03-06 17:53:00.713000,,Lent,ILL,
9,Massachusetts Institute of Technology,Matthews Fuller,2023-01-06 12:48:01.040000,,Lent,ILL,


In [5]:
def make_sankey(df):
    borrowing = df[df.Direction == "Borrowed"].groupby("Partner").size()
    lending = df[df.Direction == "Lent"].groupby("Partner").size()
    # Incoming items
    labels = ["Dartmouth College"] + sorted(borrowing.index.to_list())
    label_offset = len(labels)
    sources = borrowing.reset_index().Partner.apply(lambda x: labels.index(x)).to_list()
    targets = [labels.index("Dartmouth College")] * len(sources)
    values = [borrowing.loc[partner] for partner in labels[1:]]
    # Outgoing items
    labels += ["Dartmouth College"] + sorted(lending.index.to_list())
    targets += (
        lending.reset_index()
        .Partner.apply(lambda x: labels.index(x) + label_offset)
        .to_list()
    )
    sources += [labels.index("Dartmouth College")] * len(targets)
    values += [
        lending.loc[partner] for partner in labels if partner != "Dartmouth College"
    ]
    # Colors for incoming based on source
    colors = [IVY_COLORS[labels[i]] for i in sources[:label_offset]]
    # Colors for outgoing based on target
    colors += [IVY_COLORS[labels[i]] for i in targets[label_offset:]]
    fig = go.Figure(
        data=[
            go.Sankey(
                node=dict(
                    line=dict(color="black", width=0.5),
                    label=labels,
                    color=[IVY_COLORS[uni] for uni in labels],
                ),
                link=dict(
                    arrowlen=5,
                    source=sources,
                    target=targets,
                    value=values,
                    color=colors,
                ),
            )
        ]
    )
    fig.show()
    return fig


fig = make_sankey(df)
fig.write_html("../out/sankey.html")
fig.write_image("../out/sankey.png", scale=5.0)