In [None]:
import dill

In [None]:
dill.load_session('user_and_course_dfs.db')

In [None]:
from utilities import *

In [None]:
%run heatmap_utilities.ipynb
%run other_graphing_utilities.ipynb

In [None]:
vmax=300
c.to_file = False

In [None]:
def plot_resource_timeline_heatmap(u, *, figsize, font_scale, title_name="", 
                                   title_addition="", threshold=0, vmax=None, 
                                   ticks=None, normalized=False, 
                                   tick_label_colors=None, workshop_label_loc=None):
    sns.set(font_scale=font_scale)
    plt.figure(figsize=figsize)
    
    u = pd.concat(u, sort=False)
    df = get_timeline_data(u, normalized)

    cbar_label = "Resource Usage"
    
    fig = plt.figure(figsize=figsize)
    
    if tick_label_colors:
        grid = plt.GridSpec(1, 2, width_ratios=[18, 1])
        main_ax = fig.add_subplot(grid[0])
        discrete_cbar_ax = fig.add_subplot(grid[1])
    else:
        main_ax = None
    
    if normalized:
        cbar_label += " Normalized Based On Total Resources Used that Day"

    vmin = 0 if vmax else None

    ax = sns.heatmap(df, xticklabels=True, yticklabels=True, 
                     cmap=CMAP, vmin=vmin, vmax=vmax,
                     cbar_kws={'label': cbar_label}, ax=main_ax)
    
    if not main_ax:
        main_ax = ax
    
    if vmax:
        resource_cbar = main_ax.collections[0].colorbar
        resource_cbar.set_ticks(ticks[0])
        resource_cbar.set_ticklabels(ticks[1])
        
    if tick_label_colors:
        set_tick_colors(main_ax.get_yticklabels(which='both'), tick_label_colors, cax=discrete_cbar_ax)

    set_red_text_for_workshops(main_ax.get_xticklabels())
    title = get_title("{} Resource Timeline{}", title_name, title_addition, u.user.iloc[0])
    main_ax.set(title=title, xlabel="Date", ylabel="Resource")
    
    if workshop_label_loc:
        plt.text(*workshop_label_loc, "*Workshop dates are marked in red", color='red')
    
    save_or_display(title, "Heatmap")
    
def get_timeline_data(u, normalized):
    grouped_u = u.groupby(by='day')
    
    resources = []
    
    for _, row in grouped_u:
        resources.extend(row.display_name.unique())
    
    resources = list(set(resources))
    
    sort_resources(resources)
    
    df = pd.DataFrame(index=resources)

    for time, row in grouped_u:
        values = []

        for r in resources:
            matches = len(row[row.display_name == r]) / (len(row) if normalized else 1)
            
            values.append(matches)
        
        # could this just be df['time'] = values?
        df.loc[:, time] = values
        
    return df

In [None]:
mod_figs = {
    '2cb63d38c0d7afccf3853ee3e8b9653e': (15, 15),
    '34710aa14b316648e90d3e97b8ca952b': (10, 10),
    '3c61e39e023c800499dbfc29e1637f55': (20, 4),
    '798ae73ecfb86a78ab89be6e3ee33595': (4, 8),
    '7e9e8df5eb95867d189a107ca4706ea8': (4, 20),
    '94c0db498842ce1280d49331d4ea4016': (2, 8),
    '96749bbb0b4e0976c985a92bd3f0ddb0': (3, 8),
    'c4b6c2c3a3f08957abc17ee5c4752e84': (4, 15),
    'd41d8cd98f00b204e9800998ecf8427e': (20, 6),
    'fe540222fd0dfe96bcbc20d5a250e146': (6, 15)
}

plot_heatmaps(user_urls_dfs, mod_figs, plot_resource_timeline_heatmap)

In [None]:
mod_figs = {
    '34710aa14b316648e90d3e97b8ca952b': (10, 10),
    '96749bbb0b4e0976c985a92bd3f0ddb0': (3, 8),
    '5349577d9e4c7b0d0bfe3403e9be3d47': (4, 15),
    'bfc9b2a954447ccf0ae32e7165c85eb1': (8, 6)
}

plot_heatmaps(ai_edx_dfs, mod_figs, plot_resource_timeline_heatmap, title_addition="with SCM and BKY")

In [None]:
plot_resource_timeline_heatmap(
    user_urls_dfs, title_name="Combined", font_scale=4, figsize=(100, 65),
    title_addition="With Max Value Filtering", vmax=vmax, ticks=ticks_and_texts(vmax)
)

In [None]:
plot_resource_timeline_heatmap(
    user_urls_dfs, title_name="Combined", font_scale=4, figsize=(100, 65),
    title_addition="With Max Value Filtering", normalized=True
)

In [None]:
plot_resource_timeline_heatmap(
    ai_edx_dfs, title_name="Combined", font_scale=4,
    title_addition="with SCM and BKY With Max Filtering", 
    vmax=vmax, ticks=ticks_and_texts(vmax), figsize=(100, 65)
)

In [None]:
plot_resource_timeline_heatmap(
    all_resource_dfs, title_name="Combined", font_scale=4,
    title_addition="with everything With Max Filtering", 
    vmax=vmax, ticks=ticks_and_texts(vmax), figsize=(100, 90),
    tick_label_colors=resource_categories
)

In [None]:
plot_resource_timeline_heatmap(
    ai_edx_dfs, title_name="Combined and Normalized", font_scale=4,
    title_addition="with SCM and BKY", 
    figsize=(100, 90), normalized=True, 
    tick_label_colors=resource_categories
)

In [None]:
plot_resource_timeline_heatmap(
    all_resource_dfs, title_name="Combined and Normalized", font_scale=4,
    title_addition="with everything", 
    figsize=(100, 90), normalized=True,
    tick_label_colors=resource_categories
)

In [None]:
interesting_intervals = [('2019-1-19', '2019-2-24'), ('2019-3-02', '2019-4-30')]
interesting_intervals = [(get_date(d[0]).date(), get_date(d[1]).date()) for d in interesting_intervals]
i = 0
num_resource_threshold = 60
max_resource_threshold = 100

for u in all_resource_dfs:
    for ii in interesting_intervals:
        if u.day.iloc[0] >= ii[0] and u.day.iloc[-1] <= ii[1]:
            
            unique_names = u.display_name.unique()
            indices = collections.defaultdict(int)
            
            for un in unique_names:
                try:
                    indices[un] = resource_order.index(un)
                except ValueError:
                    indices[un] = -1

            sorted_names = sorted(indices.items(), key=lambda x: x[1])
            
            if len(sorted_names) > num_resource_threshold and max(indices.values()) > max_resource_threshold:
                print("user: {}, num resources: {}, max_resource: {}, interval: {}".format(
                    u.user.iloc[0], len(sorted_names), max(indices.values()), ii
                ))
                display([x[0] for x in sorted_names])
                i += 1
                
print("Num students: " + str(i))