In [None]:
import dill
dill.load_session('user_and_course_dfs.db')

In [None]:
%run other_graphing_utilities.ipynb

from durations import *
import networkx as nx
from matplotlib.offsetbox import AnchoredText
from datetime import timedelta
from utilities import *

In [None]:
c.to_file = False

In [None]:
# Todo change gs_ratio to width_ratios
def draw_network(sums, title_name="", title_addition="", figsize=(8.6, 3), gs_ratio=[1, 1], 
                 attribute='duration', tick_transform=lambda x: timedelta(seconds=x),
                 node_size_reps=pd.Series([]), node_size_reps_type='Durations', 
                 node_size=NODE_SIZE, arrow_size=ARROW_SIZE, node_colors=None,
                 height_ratios=[18, 1], size_label_equals_loc=(0.0008, 0.0014),
                 hspace=0.0, size_rep_unit='seconds'):
    
    if node_colors:
        assert(len(gs_ratio) == 3)
    else:
        assert(len(gs_ratio) == 2)
        
    if node_size_reps.size:
        assert(len(height_ratios) == 2)

    G = nx.from_pandas_edgelist(sums, source='from', target='to', 
                                edge_attr=attribute, create_using=nx.DiGraph())

    pos = nx.nx_agraph.graphviz_layout(G)
    
    nodes = list(G.nodes())
    
    attributes = [G[u][v][attribute] for u, v in G.edges()]
    max_attribute = max(attributes)
    vmax = max_attribute
    
    if len(attributes) == 1:
        vmin = max_attribute
    else:
        vmin = 0

    if node_size_reps.size:
        vert_subs = 2
        height_ratios = height_ratios
    else:
        vert_subs = 1
        height_ratios = [1]

    if node_size_reps.size:
        node_size_reps = node_size_reps[node_size_reps.index.isin(nodes)]
        nodes, sizes = get_node_sizes(nodes, node_size_reps, node_size)
        assert(node_size_reps.size == len(nodes) == len(sizes))
    else:
        sizes = node_size
    
    horz_subs = 2
    if node_colors:
        colors, combos, cmap = get_colors_from_dict(nodes, node_colors)
        
        if not colors: 
            node_colors = None
            horz_subs = 2
            del gs_ratio[-1]
        else:
            horz_subs = 3

    fig = plt.figure(figsize=figsize)
    grid = plt.GridSpec(vert_subs, horz_subs, width_ratios=gs_ratio, 
                        height_ratios=height_ratios, hspace=hspace)
        
    if node_size_reps.size:
        ann_label_ax = fig.add_subplot(grid[0, 0])
        main_ax = fig.add_subplot(grid[0, 1])
        size_label_ax = fig.add_subplot(grid[1, 0:])
    else:
        ann_label_ax = fig.add_subplot(grid[0])
        main_ax = fig.add_subplot(grid[1])
            
    if node_size_reps.size and node_colors:
        node_colors_cbar_ax = fig.add_subplot(grid[0, 2])
    elif not node_size_reps.size and node_colors:
        node_colors_cbar_ax = fig.add_subplot(grid[2])
        
    attribute_cbar_ax = fig.axes[0]
    
    if node_colors:
        node_drawings = nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_size=sizes,
                                               ax=main_ax, node_color=colors)
    else:
        node_drawings = nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_size=sizes, ax=main_ax)
        
    node_drawings.set_zorder(max_attribute + 1)
    
    if len(attributes) == 1:
        single_color = mpl.colors.rgb2hex(CMAP(int(CMAP.N/2)))
        edge_drawings = nx.draw_networkx_edges(G, pos, edge_color=single_color, 
                                               arrowsize=arrow_size, ax=main_ax)
    else:
        edge_drawings = nx.draw_networkx_edges(G, pos, edge_color=attributes, 
                                               edge_cmap=CMAP, arrowsize=arrow_size, 
                                               vmin=vmin, vmax=vmax, ax=main_ax)

        for i in range(len(edge_drawings)):
            edge_drawings[i].set_zorder(attributes[i])

    # Add node labels
    labels = {}
    offset = 1
    resource_len = len(resource_order)
    
    for n in nodes:
        try:
            labels[n] = resource_order.index(n)
        except ValueError:
            labels[n] = resource_len + offset
            offset += 1
    
    label_drawings = nx.draw_networkx_labels(G, pos, labels=labels, ax=main_ax)
    annotations = {}
    for key, text in label_drawings.items():
        annotations[key] = text.get_text()
        label_drawings[key].set(zorder=max_attribute + 2)

    # Add key for node labels
    ann_text = get_ann_text(annotations)
    anchored_text = AnchoredText(ann_text, loc=2)
    ann_label_ax.plot(0, 0)
    ann_label_ax.add_artist(anchored_text)

    # Define the main colorbar
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
    sm = plt.cm.ScalarMappable(cmap=CMAP, norm=norm)
    sm.set_array([])
    
    # Get the ticks for the colorbar
    if len(attributes) == 1: 
        ticks = [max_attribute]
    else:
        ticks = np.linspace(0, max_attribute, num=N_TICKS)
        
    if not tick_transform:
        tick_labels = [str(t) for t in ticks]
    else:
        tick_labels = [tick_transform(t) for t in ticks]

    if node_size_reps.size:
        cbar = plt.colorbar(sm, ticks=ticks, ax=attribute_cbar_ax)
    else:
        cbar = plt.colorbar(sm, ticks=ticks)
    
    cbar.set_ticklabels(tick_labels)
    cbar.ax.set_ylabel(attribute)
    # Hide axes
    ann_label_ax.set_axis_off()
    main_ax.set_axis_off()
    
    if node_size_reps.size:
        size_label_ax.set_axis_off()
        size_label_ax.text(-0.003, 1.4, node_size_reps_type + ":")
        size_label_ax.plot(0, 1, 'ro', markersize=math.sqrt(min(sizes)), clip_on=False)
        size_label_ax.plot(0, 0, 'ro', markersize=math.sqrt(max(sizes)), clip_on=False)
        size_label_ax.text(size_label_equals_loc[0], 1, 
                           " = {} {}".format(min(node_size_reps), size_rep_unit))
        size_label_ax.text(size_label_equals_loc[1], 0, 
                           " = {} {}".format(max(node_size_reps), size_rep_unit))
        attribute = attribute + " and " + node_size_reps_type
        
    if node_colors:
        draw_discrete_colorbar_from_cmap(combos, cmap, "Knowledge Type", cax=node_colors_cbar_ax)

    if not title_name: title_name = "Combined"
    title = "Resource Network Graph Measuring {}{} for {}".format(attribute, title_addition, title_name)
    save_or_display(title)
    
def get_ann_text(annotations, max_len=MAX_LINE_LENGTH):
    ann_sorted = sorted(annotations.items(), key=lambda x: int(x[1]))

    text = "Resource Index to Name:\n"
    
    for k, v in ann_sorted:
        text += "{}: {}\n".format(v, k)
    
    wrapped = ""
    
    while text:
        text = text.lstrip()
        
        first_newline = text.index('\n')
        
        if first_newline > max_len:
            endline = u"{}\n".format(EM_DASH) if text[max_len] != ' ' else '\n'
            
            wrapped += text[:max_len] + endline
            
            text = text[max_len:]
        else:
            wrapped += text[:first_newline+1]
            
            text = text[first_newline+1:]
    
    return wrapped

def get_node_sizes(nodes, reps, node_size, scale=3, max_size_scale=2):
    max_size = node_size * max_size_scale
    
    s_nodes = sorted(nodes)
    s_reps = reps.sort_index()
    
    max_rep = s_reps.max()
    
    actual_sizes = [max_size/scale + (s/max_rep)*(max_size*scale-1/scale) for s in s_reps]
    
    return s_nodes, actual_sizes

def analyze_knowledge_types(data, avg_duration_threshold=DURATION_AVERAGE_THRESHOLD):
    transitions_from_to = {ktype: {kt: 0 for kt in KTYPES} for ktype in KTYPES}

    categories_avg_duration_over_threshold = {kt: 0 for kt in KTYPES}
    resources_avg_duration_over_threshold = []

    total_of_each_category = {kt: 0 for kt in KTYPES}

    transition_counts = get_transition_counts(data)

    for resource, transitions in transition_counts.groupby("from"):
        median = medians[resource]
        types = get_knowledge_types_used_single(resource, resource_categories)
        categories = get_knowledge_type_categories(types)

        duration_avg = int(duration_avgs[resource])

        over_avg_duration_threshold = False

        if duration_avg > avg_duration_threshold:
            resources_avg_duration_over_threshold.append(resource)
            over_avg_duration_threshold = True

        for c in categories:
            total_of_each_category[c] += 1

            if over_avg_duration_threshold:
                categories_avg_duration_over_threshold[c] += 1

        for _, transition in transitions.iterrows():
            to_types = get_knowledge_types_used_single(transition.to, resource_categories)
            to_categories = get_knowledge_type_categories(to_types)

            from_index = get_resource_index(resource)
            to_index = get_resource_index(transition.to)

            for from_c in categories:
                for to_c in to_categories:
                    transitions_from_to[from_c][to_c] += 1
                    
    percents = []
    all_total = sum(total_of_each_category.values())
    percents.append(["{:0.2f}".format(v/all_total) for v in total_of_each_category.values()])

    for key, value in transitions_from_to.items():
        inner_percents = []
        total = sum(value.values())

        for k, v in value.items():
            inner_percents.append("{:0.2f}".format(v/total))

        percents.append(inner_percents)
    
    columns = list(transitions_from_to.keys())
    percent_df = pd.DataFrame(data=percents, 
                              index=['All'] + columns,
                              columns=columns)
    
    return transitions_from_to, percent_df, \
           categories_avg_duration_over_threshold, resources_avg_duration_over_threshold

In [None]:
for u in user_urls_dfs:
    sums = get_transition_duration_sums(get_durations([u]))
    
    if len(sums):
        user = u.user.iloc[0]
        draw_network(sums, title_name=user)

In [None]:
mod_figs = {
    '02870db0839f6b542129edb25e2c823d': [(8.6, 5)],
    '0b0a5af098f33fdb721ada7e9c8eaeea': [(15, 10), [1, 2]],
    '0e6889b45340b268fb24830d9deed4ea': [(11.4, 3), [1, 1.5]],
    '1885ebc638c7b120417b82be18f461c5': [(13, 5), [1, 1.5]],
    '19854eb2498fb05c3bc661a2b5369775': [(13, 5), [1, 1.5]],
    '1fa8ba0e70222969eae8a0b482b42628': [(13, 8), [1, 1.5]],
    '2227ccaf7eb7392ecf41d92e75cbcf8b': [(8.6, 5)],
    '234d885579bc10fcf67efac0ead40ea9': [(15, 15)]
}

for un in mod_figs.keys(): 
    user = next((u for u in user_urls_dfs if u.user.iloc[0] == un), None)
    
    sums = get_transition_duration_sums(get_durations([user]))
    
    print(un)
    
    if len(mod_figs[un]) == 1:
        draw_network(sums, figsize=mod_figs[un][0], title_name=un)
    else:
        draw_network(sums, figsize=mod_figs[un][0], gs_ratio=mod_figs[un][1], title_name=un)

In [None]:
uudd = get_durations(user_urls_dfs)

draw_network(get_transition_duration_sums(uudd), figsize=(24, 30), gs_ratio=[1, 3])

In [None]:
for u in ai_edx_dfs:
    sums = get_transition_duration_sums(get_durations([u]))
    
    if len(sums):
        user = u.user.iloc[0]
        print(user)

        draw_network(sums, title_name=user)
        #TODO fix assert(len(gs_ratio) == 2)

In [None]:
aedd = get_durations(ai_edx_dfs)

draw_network(get_transition_duration_sums(aedd), figsize=(30, 20), gs_ratio=[1, 3])

In [None]:
draw_network(get_transition_counts(user_urls_dfs), figsize=(30, 20), gs_ratio=[1, 3], 
             attribute='count', tick_transform=None, title_addition=' Without SCM and BKY')

In [None]:
draw_network(get_transition_counts(ai_edx_dfs), figsize=(30, 20), gs_ratio=[1, 3], 
             attribute='count', tick_transform=None, title_addition=' With SCM and BKY')

In [None]:
resource_durations = get_resource_duration_sums(get_durations(user_urls_dfs))

draw_network(get_transition_counts(user_urls_dfs), figsize=(30, 18.5), gs_ratio=[1, 3], 
             attribute='count', tick_transform=None, node_size_reps=resource_durations,
             title_addition=' Without SCM and BKY', arrow_size=30, node_size=200)

In [None]:
resource_durations = get_resource_duration_sums(get_durations(ai_edx_dfs))

draw_network(get_transition_counts(ai_edx_dfs), figsize=(30, 18.5), gs_ratio=[1, 3], 
             attribute='count', tick_transform=None, node_size_reps=resource_durations,
             title_addition=' With SCM and BKY', node_size=200, arrow_size=30)

In [None]:
uudd = get_durations(user_urls_dfs)

draw_network(get_transition_duration_avgs(uudd), figsize=(30, 20), 
             title_addition=" Without SCM and BKY", attribute='duration average',
             gs_ratio=[1, 3], arrow_size=30)

In [None]:
aedd = get_durations(ai_edx_dfs)

draw_network(get_transition_duration_avgs(aedd), figsize=(30, 20), 
             title_addition=" With SCM and BKY", 
             gs_ratio=[1, 3], arrow_size=30, attribute='duration average')

In [None]:
transition_counts = get_transition_counts(user_urls_dfs)
resource_duration_avgs = get_resource_duration_avgs(get_durations(user_urls_dfs))

draw_network(transition_counts, figsize=(40, 30), gs_ratio=[1, 3.5, 0.15], 
             attribute='count', tick_transform=None, node_size_reps=resource_duration_avgs,
             title_addition=' Without SCM and BKY', arrow_size=30, node_size=400, 
             node_colors=resource_categories)

In [None]:
for resource, transitions in get_transitions_to_and_from(transition_counts):
    draw_network(transitions, figsize=(20, 10), gs_ratio=[1.8, 3.5, 0.15], 
                 attribute='count', tick_transform=None, node_size_reps=resource_duration_avgs,
                 title_addition=' For ' + resource + ' Without SCM and BKY', 
                 arrow_size=30, node_size=300, node_colors=resource_categories, height_ratios=[13, 1],
                 size_label_equals_loc=(0.0008, 0.0019), hspace=0.2, node_size_reps_type='Duration Average',
                 size_rep_unit='Seconds')

In [None]:
for resource, transitions in get_transitions_to_and_from(transition_counts):
    draw_network(transitions, figsize=(20, 10), gs_ratio=[1.8, 3.5, 0.15], 
                 attribute='count', tick_transform=None, node_size_reps=resource_duration_avgs,
                 title_addition=' For ' + resource + ' Without SCM and BKY and Without Homepage', 
                 arrow_size=30, node_size=300, node_colors=resource_categories, height_ratios=[13, 1],
                 size_label_equals_loc=(0.0008, 0.0019), hspace=0.2, node_size_reps_type='Duration Average',
                 size_rep_unit='Seconds')