# graph diffusion

what follows is a walkthrough of different graph diffusion and labelling algorithms, largely based on my review of [this paper](https://arxiv.org/abs/1703.02618)

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go
import plotly.offline
import seaborn as sns
import tqdm

sns.set()

plotly.offline.init_notebook_mode(connected=True)

## generating a network for doing label propagation

let's create a basic network with $N$ nodes, $L$ possible labels, and some number $n_\ell < N$ of those $N$ nodes being labelled. our goal is to predict the other label values in a semi-supervised way.

we will actually use the citeseer knowledge graph dataset as was done in the paper listed above. fortunately that's available for download as part of the GCN package [here](https://github.com/tkipf/gcn)

go clone that directory somewhere and then add it to your path to load it

In [None]:
pwd

In [None]:
cd /Users/zach.lamberty/code/gcn/gcn/

In [None]:
import utils

In [None]:
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data('citeseer')

return to the place we saw in `pwd` up above

In [None]:
cd /Users/zach.lamberty/personal/notebooks/Zachademy/

quick summary of what we just downloaded:

+ `adj`: node adjacency matrix
+ `features`: the predictor dataset x (contains train and test, labelled and unlabelled)
+ `y_{train,val,test}`: labels for training, validation, and test (all have the same shape, but the section that is filled in changes from one to the next
+ `{train,val,test}_mask`: mask indexer for training, validatino, and test (used to subset features to the desired subset

In [None]:
adj.shape

In [None]:
features.shape

In [None]:
y_train.shape

In [None]:
f, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3)
ax0.imshow(y_train, aspect='auto')
ax1.imshow(y_val, aspect='auto')
ax2.imshow(y_test, aspect='auto')

In [None]:
unlab_start = int(train_mask.sum() + val_mask.sum())
unlab_stop = test_mask.argmax()
unlab_mask = np.zeros(y_train.shape[0])
unlab_mask[range(unlab_start, unlab_stop)] = 1
unlab_mask = np.array(unlab_mask, dtype=np.bool)

In [None]:
all_mask = train_mask + val_mask + unlab_mask + test_mask

In [None]:
plt.imshow(adj.todense())

in the paper they make reference to soft label data set $y_{ij}$ and affinity matrix $W$; let's build / alias those

In [None]:
W = adj

In [None]:
y = y_train + y_test + y_val

plt.imshow(y, aspect='auto')

my own view: let's create a dataframe with `node_index`, `is_seed`, and `label` values as described in the docstring of the algorithm functions defined below

In [None]:
df0 = pd.DataFrame({
    'node_index': list(range(y.shape[0])),
    'is_seed': train_mask,
})

df0.loc[train_mask, 'label'] = y_train[train_mask, :].argmax(axis=1)

In [None]:
NUM_LABELS = df0.label.nunique()

## algo 1: label propagation

basically we do a simple average of all labels among neighboring nodes and label all nodes with their neighbors

In [None]:
def _init_label_prop(df_labels, adj=adj):
    D = np.diag(np.ravel(adj.sum(axis=0)))
    D_inv = np.diag(1 / np.ravel(adj.sum(axis=0)))

    # y_lp for the known labeled cases should acutally be pm 1, not 0 and 1, per the
    # paper's instructions.
    df_y = df_labels.copy()
    df_y.loc[:, 'count_val'] = 1
    df_y = df_y.pivot(
        index='node_index',
        columns='label',
        values='count_val'
    )
    df_y.drop(columns=np.nan, inplace=True)
    df_y.fillna(0, inplace=True)

    # for the seeds nodes, we want this df to have -1 for known negatives and +1 for
    # known positives
    df_y.loc[df_labels.is_seed, :] = df_y.loc[df_labels.is_seed, :] * 2 - 1
    df_y.loc[:, 'iteration'] = 0
    
    df_y = df_y.set_index('iteration', append=True)
    
    return D_inv, df_y

In [None]:
def label_prop(df_labels, n=50, adj=adj, return_details=False):
    """simple spectral diffusion measure
    
    args:
        df_labels (pd.DataFrame): a dataframe which contains three features:
            `node_index`, a unique identifier for each node; `is_seed`, a 
            boolean indicating whether or not that particular node should be
            considered a seed for this run of the algorithm, and `label`, an
            integer value indicating which label that node has (should only be
            set for seed nodes, but this is not enforced)
        n (int): number of iterations to take
        adj (np.NdArray): adjacency matrix of the graph
        return_details (bool): whether or not to include the history of label 
            prediction values
    
    returns:
        pd.DataFrame: an augmented form of the input which provides predictions
            for the unlabelled non-seed nodes as well as in-class ordering for
            the confidence level of that prediction
        pd.DataFrame (optional): prediction history of values over the number of
            iterations (only returned if `return_details = True`)
    
    raises:
        None
        
    """
    D_inv, df_y = _init_label_prop(df_labels, adj)
    
    idx = pd.IndexSlice
    
    for i in tqdm.tnrange(1, n + 1, leave=False):
        # grab previous generation's y vals
        y_prev = df_y.loc[idx[:, i - 1], :]

        # diffuse the previous values with W and average
        y_new = D_inv @ adj @ y_prev
        
        # override the propagated labels with the true labels
        y_new[df_labels.is_seed] = y_prev.values[df_labels.is_seed]

        df_y_new = pd.DataFrame(data=y_new)
        df_y_new.index.name = 'node_index'
        df_y_new.loc[:, 'iteration'] = i
        df_y_new.set_index('iteration', append=True, inplace=True)

        # concatenate the two
        df_y = pd.concat([df_y, df_y_new], ignore_index=False)
    
    # calculate predicitons with margins for non-seed nodes
    # get the last iteration
    max_iter = df_y.index.get_level_values(-1).max()
    df_y_final = df_y.loc[idx[:, max_iter], :]
    df_y_final.index = df_y_final.index.droplevel(level='iteration')

    # subset down to *predictions* (ignore all-0s)
    df_y_final = df_y_final[df_y_final.max(axis=1) != 0]

    # pick out the final label value for each
    df_pred = pd.DataFrame(
        data={
            'label': df_y_final.idxmax(axis=1),
            'val': df_y_final.max(axis=1),
        },
        index=df_y_final.index
    )

    # join in the original is_seed values to calculate prediction ranking in-class
    df_pred = df_pred.join(
        df_labels.set_index('node_index').is_seed, 
        how='left'
    )

    # in-class ranking calculation
    df_pred = df_pred.join(
        df_pred.groupby(['is_seed', 'label']) \
            .rank(ascending=False, method='min') \
            .rename(columns={'val': 'in_class_rank'}),
        how='left'
    )

    if return_details:
        return df_pred, df_y.sort_index()
    else:
        return df_pred

In [None]:
df_pred, df_y_hist = label_prop(df_labels=df0, return_details=True, n=200)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[3324].index,
        y=df_y_hist.loc[idx[3324, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[3311].index,
        y=df_y_hist.loc[idx[3311, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[703].index,
        y=df_y_hist.loc[idx[703, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

how did this do against validation and training?

In [None]:
dfval = pd.DataFrame(
    {'true_label': y_val.argmax(axis=1)},
    index=range(y_val.shape[0]),
)
dfval.index.name = 'node_index'
dfval = dfval[val_mask]

dftest = pd.DataFrame(
    {'true_label': y_test.argmax(axis=1)},
    index=range(y_test.shape[0]),
)
dftest.index.name = 'node_index'
dftest = dftest[test_mask]

In [None]:
def prediction_evaluation(df_pred):
    z = df_pred.join(
        pd.concat([dfval, dftest]), how='left'
    )

    df_y_comparable = z[z.true_label.notna()]
    accuracy = (df_y_comparable.label == df_y_comparable.true_label).mean()
    df_y_crosstab = pd.crosstab(
        index=df_y_comparable.label,
        columns=df_y_comparable.true_label
    )

    return accuracy, df_y_comparable, df_y_crosstab

In [None]:
accuracy, df_y_comparable, df_y_crosstab = prediction_evaluation(df_pred)
accuracy

In [None]:
df_y_crosstab

In [None]:
data =[
    go.Heatmap(
        x=df_y_crosstab.index,
        y=df_y_crosstab.columns,
        z=df_y_crosstab.values,
        colorscale='Reds',
    )
]
plotly.offline.iplot(data)

## algo 2: normalized laplacian lp

similar to a page rank, similar to the above with a hyperparameter $\alpha$ and does updates

$$
Y^{(\infty)} = \alpha \left(\mathbb{1} - (1 - \alpha)A\right)^{-1} Y^{(0)}
$$

this is a random diffusion walk with an additional probabilty $\alpha$ of not taking a step at all

In [None]:
def _init_norm_laplacian_lp(df_labels, adj=adj):
    D = np.diag(np.ravel(adj.sum(axis=0)))
    D_negsqrt = np.diag(np.ravel(adj.sum(axis=0)) ** -.5)
    A = D_negsqrt @ adj @ D_negsqrt

    # y_lp for the known labeled cases should acutally be pm 1, not 0 and 1, per
    # the paper's instructions.
    df_y = df_labels.copy()
    df_y.loc[:, 'count_val'] = 1
    df_y = df_y.pivot(
        index='node_index',
        columns='label',
        values='count_val'
    )
    df_y.drop(columns=np.nan, inplace=True)
    df_y.fillna(0, inplace=True)

    # for the seeds nodes, we want this df to have -1 for known negatives and +1 for
    # known positives
    df_y.loc[df_labels.is_seed, :] = df_y.loc[df_labels.is_seed, :] * 2 - 1
    df_y.loc[:, 'iteration'] = 0
    
    df_y = df_y.set_index('iteration', append=True)
    
    return A, df_y

In [None]:
def norm_laplacian_lp(df_labels, n=150, alpha=0.1, adj=adj, return_details=False):
    """simple spectral diffusion measure
    
    args:
        df_labels (pd.DataFrame): a dataframe which contains three features:
            `node_index`, a unique identifier for each node; `is_seed`, a 
            boolean indicating whether or not that particular node should be
            considered a seed for this run of the algorithm, and `label`, an
            integer value indicating which label that node has (should only be
            set for seed nodes, but this is not enforced)
        n (int): number of iterations to take (default: 150)
        alpha (float): hyperparameter scaling the probability of our diffusion
            walker returning to the first generation's label value
            (default: 0.1)
        adj (np.NdArray): adjacency matrix of the graph
        return_details (bool): whether or not to include the history of label 
            prediction values (default: False)
    
    returns:
        pd.DataFrame: an augmented form of the input which provides predictions
            for the unlabelled non-seed nodes as well as in-class ordering for
            the confidence level of that prediction
        pd.DataFrame (optional): prediction history of values over the number of
            iterations (only returned if `return_details = True`)
    
    raises:
        None
        
    """
    A, df_y = _init_norm_laplacian_lp(df_labels, adj)
    
    idx = pd.IndexSlice
    
    for i in tqdm.tnrange(1, n + 1, leave=True):
        # grab first and previous generation's y vals
        y_0 = df_y.loc[idx[:, 0], :]
        y_prev = df_y.loc[idx[:, i - 1], :]

        # diffuse the previous values with A and average
        y_new = (1 - alpha) * A @ y_prev + alpha * y_0
        
        # index fucks with things
        y_new.index = y_new.index.droplevel(level='iteration')

        # override the propagated labels with the true labels
        y_new[df_labels.is_seed] = y_prev.values[df_labels.is_seed]

        # build the df
        df_y_new = pd.DataFrame(data=y_new)
        df_y_new.index.name = 'node_index'
        df_y_new.loc[:, 'iteration'] = i
        df_y_new.set_index('iteration', append=True, inplace=True)

        # concatenate the two
        df_y = pd.concat([df_y, df_y_new], ignore_index=False)
    
    # calculate predicitons with margins for non-seed nodes
    # get the last iteration
    max_iter = df_y.index.get_level_values(-1).max()
    df_y_final = df_y.loc[idx[:, max_iter], :]
    df_y_final.index = df_y_final.index.droplevel(level='iteration')

    # subset down to *predictions* (ignore all-0s)
    df_y_final = df_y_final[df_y_final.max(axis=1) != 0]

    # pick out the final label value for each
    df_pred = pd.DataFrame(
        data={
            'label': df_y_final.idxmax(axis=1),
            'val': df_y_final.max(axis=1),
        },
        index=df_y_final.index
    )

    # join in the original is_seed values to calculate prediction ranking in-class
    df_pred = df_pred.join(
        df_labels.set_index('node_index').is_seed, 
        how='left'
    )

    # in-class ranking calculation
    df_pred = df_pred.join(
        df_pred.groupby(['is_seed', 'label']) \
            .rank(ascending=False, method='min') \
            .rename(columns={'val': 'in_class_rank'}),
        how='left'
    )

    if return_details:
        return df_pred, df_y.sort_index()
    else:
        return df_pred

In [None]:
df_pred, df_y_hist = norm_laplacian_lp(df_labels=df0, return_details=True)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[3324].index,
        y=df_y_hist.loc[idx[3324, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[3311].index,
        y=df_y_hist.loc[idx[3311, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

In [None]:
idx = pd.IndexSlice
data = [
    go.Scatter(
        x=df_y_hist.loc[703].index,
        y=df_y_hist.loc[idx[703, :], l],
        name=l
    )
    for l in df_y_hist.columns
]

plotly.offline.iplot(data)

In [None]:
df_pred.head()

how did this do against validation and training?

In [None]:
accuracy, df_y_comparable, df_y_crosstab = prediction_evaluation(df_pred)
accuracy

In [None]:
df_y_crosstab

In [None]:
data =[
    go.Heatmap(
        x=df_y_crosstab.index,
        y=df_y_crosstab.columns,
        z=df_y_crosstab.values,
        colorscale='Reds',
    )
]
plotly.offline.iplot(data)

## algo 3: nearest-seed

this is a "social diffusion" (i.e. edge/traversal-focused) algo

In [None]:
def _init_nearest_seed(df_labels, adj=adj):
    # initialize
    G = nx.DiGraph(adj)

    # add the known labels
    for (node_index, rec) in df_labels[df_labels.is_seed].iterrows():
        G.node[node_index]['label'] = rec.label

    # a counter of the number of times a nearest neighbor had a particular label
    #df_c = df_labels.copy()
    #df_c.loc[train_mask, 'count_val'] = 1

    #df_c = df_c.pivot(
    #    index=None,
    #    columns='label',
    #    values='count_val'
    #).drop(columns=np.nan).fillna(0)
    #df_c.head()
    
    # count is easier to build in the algo itself for now
    df_c = pd.DataFrame()
    
    # seed predictions are constant, let's not belabor this
    # known seeds get re-introduced with a value of 1
    seeds = df_labels[df_labels.is_seed].index.values
    df_seed_pred = pd.DataFrame(
        data={
            'label': df_labels.loc[seeds, 'label'],
            'val': 1,
            'is_seed': True
        },
        index=seeds
    )
    df_seed_pred.index.name = 'trg'
    df_seed_pred.head()

    return G, df_c, seeds, df_seed_pred

In [None]:
def _sample_edge_weights(G, delta=.5):
    # randomly sample edge weights
    for u in G:
        # degree is both *out* and *in*
        deg = G.degree(u) / 2

        # DELTA rescaled by beta; larger degrees yield larger weights, etc
        for v in G[u]:
            G[u][v]['weight'] = np.random.exponential(deg) + delta

In [None]:
def nearest_seed(df_labels, n=100, delta=.5, adj=adj, rng_seed=1337, cutoff=20,
                 return_details=False):
    """simple social diffusion model
    
    args:
        df_labels (pd.DataFrame): a dataframe which contains three features:
            `node_index`, a unique identifier for each node; `is_seed`, a 
            boolean indicating whether or not that particular node should be
            considered a seed for this run of the algorithm, and `label`, an
            integer value indicating which label that node has (should only be
            set for seed nodes, but this is not enforced)
        n (int): number of iterations to take (default: 100)
        delta (float): hyperparameter setting a constant offset in the
            exponential distribution sampling for edge weights (think of this as
            a default minimum weight). (default: 0.5)
        adj (np.NdArray): adjacency matrix of the graph
        rng_seed (int): seed for the numpy random number generator
        cutoff (float): cutoff ofr the dijkstra shortest path algorithm (don't
            calculate any shortest paths longer than this number) (defaul: 20) 
        return_details (bool): whether or not to include the history of label 
            prediction values (default: False)
    
    returns:
        pd.DataFrame: an augmented form of the input which provides predictions
            for the unlabelled non-seed nodes as well as in-class ordering for
            the confidence level of that prediction
        pd.DataFrame (optional): prediction history of values over the number of
            iterations (only returned if `return_details = True`)
    
    raises:
        None
        
    """
    np.random.seed(rng_seed)
    
    G, df_c, seeds, df_seed_pred = _init_nearest_seed(df_labels, adj)
    
    idx = pd.IndexSlice
    
    for i in tqdm.tnrange(n, leave=False):
        _sample_edge_weights(G)
        
        # collect the closest labelled (seed) node for every unlabelled node
        df_dists = pd.DataFrame()
        for u in tqdm.tqdm_notebook(seeds, leave=False):
            # find all distances from this seed to all nodes
            dists = nx.single_source_dijkstra_path_length(
                G, u, cutoff=cutoff, weight='weight'
            )

            df_dists_now = pd.DataFrame([
                {
                    'src': u, 'src_lab': G.node[u]['label'], 'trg': trg, 
                    'dist': dist
                }
                for (trg, dist) in dists.items()
                if trg >= 120
            ])

            df_dists = df_dists.append(df_dists_now, ignore_index=True)

        closest_labels = df_dists.sort_values(by=['trg', 'dist']) \
                                 .groupby('trg') \
                                 .first() \
                                 .reset_index()[['trg', 'src_lab']]

        # our iter is actually easier if we just collect the closest labels for
        # all increment the created counter with the nearest label
        df_c = df_c.append(closest_labels, ignore_index=True).reset_index(drop=True)
    
    df_y_final = pd.crosstab(index=df_c.trg, columns=df_c.src_lab)
    df_y_final = df_y_final.div(df_y_final.sum(axis=1), axis=0)

    # pick out the final label value for each
    df_pred = pd.DataFrame(
        data={
            'label': df_y_final.idxmax(axis=1),
            'val': df_y_final.max(axis=1),
        },
        index=df_y_final.index
    )

    # join in the original is_seed values to calculate prediction ranking in-class
    df_pred = df_pred.join(
        df_labels.set_index('node_index').is_seed, 
        how='left'
    )
    
    # join in the constant seed values
    df_pred = df_pred.append(df_seed_pred).sort_index()

    # in-class ranking calculation
    df_pred = df_pred.join(
        df_pred.groupby(['is_seed', 'label']) \
            .rank(ascending=False, method='min') \
            .rename(columns={'val': 'in_class_rank'}),
        how='left'
    )
    
    if return_details:
        return df_pred, df_y_final.sort_index()
    else:
        return df_pred

In [None]:
df_pred, df_y_final = nearest_seed(df_labels=df0, return_details=True)

In [None]:
df_pred.tail()

how did this do against validation and training?

In [None]:
accuracy, df_y_comparable, df_y_crosstab = prediction_evaluation(df_pred)
accuracy

In [None]:
df_y_crosstab

In [None]:
data =[
    go.Heatmap(
        x=df_y_crosstab.index,
        y=df_y_crosstab.columns,
        z=df_y_crosstab.values,
        colorscale='Reds',
    )
]
plotly.offline.iplot(data)

# bootstrapping wrapper

now we implement a meta-algorithm / wrapper for bootstrapping the above three algorithms. we will effectively pull out some proportion of the best predictions for each of the above algorithms and treat them as new synthetic "true" labels. this process will be iterated

In [None]:
def bootstrap(df0, algo, n=100, r=.01, return_bootstrap_details=False, 
              return_algo_details=False, **kwargs):
    """the bootstrap wrapper for synthetic target propagation
    
    args:
        df0 (pd.DataFrame): the initial dataframe containing seed information
        algo (func): an algorithm function which will return label predictions
            with a ranking feature
        n (int): number of iterations for the algorithm (default: 100)
        r (float): hyperparameter determining what fraciton of all the 
            unlabelled cases are converted into synthetic labels in teh 
            following iteration. note: must be between 0 and 1 (default: 0.01)
        return_bootstrap_details (bool): whether or not to include the history
            of predicted values obtained during successive iterations of the 
            bootstrap wrapper (default: False)
        return_algo_details (bool): whether or not to include the history of the
            algorithm's internal prediction values (default: False)
        kwargs: will be pass directly to `algo`
        
    returns:
        tbd
        
    raises:
        None
    """
    if not (0 < r < 1):
        raise ValueError("r must be between 0 and 1")
        
    df_labels = df0.copy()
    df_pred = pd.DataFrame()
    
    # it is unclear in the paper whether the frequencies we use to balance class
    # labels are meant to update as the populations change, but the basic
    # pseudo-code for the algorithm leads me to beleive they are not. for now we
    # will calculate them once and for all
    # but the results blow ass if we do it this way so no
    #freq = df_labels[df_labels.is_seed].label.value_counts(normalize=True)
    
    for i in tqdm.tnrange(n):
        # make our predictions and record them as a history object
        df_pred_now = algo(
            df_labels, return_details=return_algo_details, **kwargs
        )
        df_pred_now.loc[:, 'bootstrap_iteration'] = i
        df_pred_now.set_index('bootstrap_iteration', append=True, inplace=True)
        df_pred = df_pred.append(df_pred_now, ignore_index=False)
        
        # update df_labels
        candidates = df_pred_now[~df_pred_now.is_seed].copy().reset_index()

        # moved frequency calculation here to be dependent on number of 
        # predicted labels
        freq = candidates.label.value_counts()
        
        num_to_keep = pd.concat(
            [
                # frequency values
                (r * freq).sort_index(),
                # number available based on new predictions
                candidates.label.value_counts().sort_index()
            ],
            axis=1
        ).min(axis=1).astype(int).reset_index()
        num_to_keep.columns = ['label', 'num_to_keep']
        
        # if we don't grow the size of our set at all, break
        if num_to_keep.num_to_keep.sum() == 0:
            break

        # go and get the top that-many from each group
        candidates = candidates.merge(
            num_to_keep,
            how='left',
            on='label',
        )
        candidates = candidates[
            candidates.in_class_rank <= candidates.num_to_keep
        ]
        
        # update df_labels with this new information
        df_labels.loc[candidates.node_index, 'is_seed'] = True
        df_labels.loc[candidates.node_index, 'label'] = candidates.label.values
        df_labels.loc[candidates.node_index, 'is_synthetic'] = True
        df_labels.loc[candidates.node_index, 'added_in_bootstrap_iteration'] = i
    
    if return_bootstrap_details:
        return df_labels, df_pred
    else:
        return df_labels

In [None]:
df_labels, df_pred = bootstrap(
    df0, label_prop, n=20, r=0.1, return_bootstrap_details=True
)

In [None]:
accuracies, crosstabs = {}, {}
for i in df_pred.index.get_level_values('bootstrap_iteration').unique():
    accuracy, df_y_comparable, df_y_crosstab = prediction_evaluation(
        df_pred.loc[idx[:, i], :]
    )
    accuracies[i] = accuracy
    crosstabs[i] = df_y_crosstab

In [None]:
accuracies

In [None]:
from ipywidgets import interact, widgets

In [None]:
def f(i):
    df_y_crosstab = crosstabs[i]
    data =[
        go.Heatmap(
            x=df_y_crosstab.index,
            y=df_y_crosstab.columns,
            z=df_y_crosstab.values,
            colorscale='Reds',
        )
    ]
    plotly.offline.iplot(data)

interact(f, i=widgets.IntSlider(min=0, max=max(crosstabs.keys()), step=1, value=0))