In [1]:
import numpy as np
import pandas as pd
from datetime import date

from bokeh.layouts import widgetbox, row
from bokeh.models.widgets import MultiSelect, RadioGroup, Select
from bokeh.plotting import figure, curdoc
from bokeh.palettes import Set1_9


In [2]:
start_term = "2014.Spring"


In [3]:
def date_diff_weeks(start, end):
    """
    returns the difference between two dates in integer weeks
    """
    diff = (pd.to_datetime(end) - pd.to_datetime(start))
    return int( diff / np.timedelta64(1,'W'))


def adm_week(d):
    """
    returns calendar week number and Admissions Week Number for a given date, d
    """
    year = d.year
    week_number = d.isocalendar()[1]

    if d >= date(year, 9, 1):
        adm_start = date(year, 9, 1)
    else:
        adm_start = date(year - 1, 9, 1)

    adm_week_number = min(date_diff_weeks(adm_start, d), 53)

    return week_number, adm_week_number



In [39]:
def create_figure(df):
    stage = stage_list[stage_rg.active]

    prog = program.value

    title = (
        f"{prog} - Admissions Weekly Summary - Week {adm_week_number:d} ({today_str})"
    )

    term = select_term.value

    term_list = list(terms.value)
    term_list.reverse()

    print(term, stage, prog)
    y_max = df[(term, stage, prog)].max()
    for t in term_list:
        print(t, stage, prog)
        if (t, stage, prog) in df.index.values:
            ym = df[(t, stage, prog)].max()
            if ym > y_max:
                y_max = ym

    TOOLS = "pan,wheel_zoom,box_zoom,save,reset"
    # TOOLS="crosshair,pan,wheel_zoom,box_zoom,save,reset"

    p = figure(
        plot_width=800,
        plot_height=600,
        title=title,
        x_axis_label="Admissions Week Number (year starts Sept 1)",
        y_axis_label=stage,
        tools=TOOLS,
        x_range=(0, 54),
        y_range=(0, y_max * 1.05),
    )

    p.line(df.index, df[(term, stage, prog)], color="red", line_width=2, legend_label=term)

    c = 1
    for t in term_list:
        p.line(df.index, df[(t, stage)], color=Set1_9[c], legend_label=t)
        if c <= 8:
            c += 1
        else:
            c = 1

    # week_number line
    p.line(
        (adm_week_number, adm_week_number),
        (-1000, 5000),
        color="green",
        line_width=0.8,
        line_dash="dashed",
        legend_label=f"Week {adm_week_number:d}",
        alpha=0.8,
    )

    p.legend.location = "top_left"

    p.yaxis.minor_tick_line_color = None

    return p


def update(attr, old, new):
    layout.children[1] = create_figure(summ_t)


def update_prog(attr, old, new):
    terms_opt = sorted(
        list(
            pt.loc[
                (
                    (pt["curriculum"] == program.value)
                    & (pt["stage"] == stage_list[stage_rg.active])
                ),
                "year_term",
            ]
            .dropna()
            .unique()
        )
    )
    terms_opt = [l for l in terms_opt if "Fall" in l]
    if len(terms_opt) > 1:
        terms_opt.remove(select_term.value)
    terms.options = terms_opt
    terms.value = [terms_opt[-1]]
    layout.children[1] = create_figure(summ_t)


def update_term(attr, old, new):
    terms_opt = sorted(
        list(
            pt.loc[
                (
                    (pt["curriculum"] == program.value)
                    & (pt["stage"] == stage_list[stage_rg.active])
                ),
                "year_term",
            ]
            .dropna()
            .unique()
        )
    )
    terms_opt = [l for l in terms_opt if "Fall" in l]
    terms_opt.remove(select_term.value)
    terms.options = terms_opt
    terms.value = terms_opt
    layout.children[1] = create_figure(summ_t)
    program_list = sorted(
        list(
            pt.loc[
                (
                    (pt["year_term"] == select_term.value)
                    & (pt["stage"] == stage_list[stage_rg.active])
                ),
                "curriculum",
            ]
            .dropna()
            .unique()
        )
    )
    prog = program_list.index(program.value)
    program.options = program_list
    program.value = program_list[prog]


In [5]:
today = date.today()
today_str = today.strftime("%Y%m%d")


In [8]:
df = pd.read_hdf("data/stage_data", key="weekly")
df = df[(df["year_term"] > start_term )]
week_number, adm_week_number = adm_week(today)

# curr_list = sorted(list(df['curriculum'].dropna().unique()))

print(df.shape)

(25998, 58)


In [9]:
summ = df.groupby(["year_term", "stage", "curriculum"]).sum()
summ_t = summ.transpose()


In [10]:
pt = (
    df.loc[:, ["year_term", "stage", "curriculum"]]
    .dropna()
    .drop_duplicates(["year_term", "stage", "curriculum"])
)


In [11]:
# widgets
stage_list = ["Applied", "Accepted", "Deposited"]
stage_rg = RadioGroup(name="Stage:", labels=stage_list, active=2)
stage_rg.on_change("active", update)


In [12]:
all_terms = sorted(list(df["year_term"].dropna().unique()))
all_terms = [l for l in all_terms if "Fall" in l]
select_term = Select(title="Selected Term:", value=all_terms[-1], options=all_terms)
select_term.on_change("value", update_term)


In [13]:
program_list = sorted(
    list(
        pt.loc[
            (
                (pt["year_term"] == select_term.value)
                & (pt["stage"] == stage_list[stage_rg.active])
            ),
            "curriculum",
        ]
        .dropna()
        .unique()
    )
)
program = Select(
    title="Selected Academic Program:", value=program_list[0], options=program_list
)
program.on_change("value", update_prog)


In [14]:
terms_opt = all_terms.copy()
terms_opt.remove(select_term.value)
terms = MultiSelect(
    title="Other Displayed Terms: (ctrl-click to select/de-select)",
    options=terms_opt,
    size=5,
    value=[terms_opt[-1]],
)
terms.on_change("value", update)


In [40]:
# layout
controls = widgetbox([stage_rg, select_term, program, terms])
layout = row(controls, create_figure(summ_t))

curdoc().add_root(layout)
curdoc().title = "Admissions Weekly Report"


2021.Fall Deposited BAKA
2020.Fall Deposited BAKA


  if (t, stage, prog) in df.index.values:


In [16]:
print(program.options)

Select(id='1003', ...)


In [17]:
print(program.value)

BAKA


In [25]:
y_max = summ_t[('2021.Fall', 'Deposited', 'BAKA')].max()
print(y_max)

1


In [24]:
y_max = summ_t[('2020.Fall', 'Deposited', 'BAKA')].max()
print(y_max)

KeyError: 3010

In [27]:
summ_t.head()

year_term,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,2015.Fall,...,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring,2021.Spring
stage,Accepted,Accepted,Accepted,Accepted,Accepted,Accepted,Accepted,Accepted,Accepted,Accepted,...,Deposited,Deposited,Deposited,Deposited,Deposited,Deposited,Deposited,Deposited,Deposited,Deposited
curriculum,AALM,BAKA,BASM,BIOG,CASM,COMM,CULA,EBSB,ECOR,ENST,...,INST,MNGTS,MSNRCA,MSNRCF,MSNRCS,NRCM,PACM,PSYCH,RECR,UNDM
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0,0
4,0,1,0,1,1,0,1,0,0,0,...,0,0,0,0,0,2,0,0,1,0


In [31]:
summ_t[('2018.Fall', 'Deposited', 'BAKA')]

00    0
01    1
02    1
03    1
04    1
05    1
06    1
07    1
08    1
09    1
10    1
11    1
12    1
13    1
14    1
15    1
16    1
17    1
18    1
19    1
20    1
21    1
22    2
23    2
24    2
25    2
26    2
27    2
28    2
29    2
30    2
31    2
32    3
33    3
34    3
35    3
36    3
37    3
38    3
39    3
40    3
41    3
42    3
43    3
44    3
45    3
46    3
47    3
48    3
49    3
50    3
51    3
52    3
53    3
Name: (2018.Fall, Deposited, BAKA), dtype: int64