# Ensemble Generation

In this notebook we create the shuffled ensembles of annotated hypergraphs and calculate the features of the new shuffled data.

In [5]:
%%px
import numpy as np
from time import time

from ahyper import AnnotatedHypergraph
from ahyper.ensemble import data_features, shuffled_ensemble_features, save_feature_study

In [6]:
%%px
from ahyper.utils import (average_entropy, average_value, variance_value, entropy_value)
from ahyper.observables import (_degree_centrality,
                                _pagerank_centrality,
                                _eigenvector_centrality,
                                _connected_components,
                                _assortativity,
                                node_role_participation,
                                local_role_density)

INTERACTION_MAP = {'enron':np.array([[0,1,0.25],[0,0,0],[0,0,0]]),
                   'twitter':np.array([[0,0.75,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,1]]),
                   'movielens':np.array([[0,1,],[0.25,0.25]]),
                   'stack_overflow':np.array([[0,0.1,0.1],[0.3,0.3,0.3],[1,0.5,0]]),
                   'math_overflow':np.array([[0,0.1,0.1],[0.3,0.3,0.3],[1,0.5,0]]),
                   'scopus_multilayer':np.array([[0,1,0.5],[0.2,0.2,0.2],[1,0.25,0]])
                   }

FEATURES = {'weighted_degree_entropy': {'func':entropy_value(_degree_centrality),
                                'acts_on':'weighted_projection',
                                'kwargs':{}
                               },
#             'weighted_pagerank_entropy': {'func':entropy_value(_pagerank_centrality),
#                                 'acts_on':'weighted_projection',
#                                 'kwargs':dict(weight='weight')
#                                },
#             'weighted_pagerank_variance': {'func':variance_value(_pagerank_centrality),
#                                 'acts_on':'weighted_projection',
#                                 'kwargs':dict(weight='weight')
#                                },
            'assortativity':{'func': _assortativity,
                             'acts_on':'annotated_hypergraph',
                             'kwargs':dict(n_samples=100000, by_role=True, spearman=True)},
#             'weighted_eigenvector_entropy': {'func':entropy_value(_eigenvector_centrality),
#                                 'acts_on':'weighted_projection',
#                                 'kwargs':{}
#                                },
#             'connected_components': {'func':_connected_components,
#                                 'acts_on':'weighted_projection',
#                                 'kwargs':{}
#                                },
#             'node_role_entropy': {'func':average_entropy(node_role_participation),
#                                 'acts_on':'annotated_hypergraph',
#                                 'kwargs':dict(absolute_values=False)
#                                },
#             'neighbourhood_role_entropy': {'func':average_entropy(local_role_density),
#                                 'acts_on':'annotated_hypergraph',
#                                 'kwargs':dict(absolute_values=False, include_focus=False)
#                                },
           }

In [3]:
import ipyparallel as ipp

rc = ipp.Client()
dview = rc[:]

In [4]:
%px !pwd

[stdout:0] /annotated
[stdout:1] /annotated
[stdout:2] /annotated
[stdout:3] /annotated
[stdout:4] /annotated
[stdout:5] /annotated


In [10]:
# %%px
DATASETS = ['enron','math_overflow','movielens','scopus_multilayer','stack_overflow','twitter']

def process_study(data_name):
    
    print(data_name)
    start = time()
    
    A = AnnotatedHypergraph.from_incidence(data_name, 
                                           root='./data/',
                                           relabel_roles=False,
                                           add_metadata=False)
    
    A.assign_role_interaction_matrix(INTERACTION_MAP[data_name])
    
    save_feature_study(A,
                       data_name=f'{data_name}_as',
                       shuffle_fraction=0.1, 
                       num_shuffles=500,
                       features=FEATURES,
                       burn_fraction=200,
                       root='./results/',
                       verbose=True,
                       fail_hard=False
                       )
    
    end = time()
    
    return (end-start)/60

In [11]:
# process_study('enron')
parallel_result = dview.map_async(process_study, DATASETS)

In [12]:
parallel_result.elapsed, parallel_result.completed

(2.130711, [None, None, None, None, None, None])

In [13]:
parallel_result.elapsed//(60*60)

0.0

In [30]:
parallel_result.stdout

['enron\nRunning Role Preserving MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\rRunning Role Destroying MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\r',
 'math_overflow\nRunning Role Preserving MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\r',
 'movielens\n',
 'scopus_multilayer\n',
 'stack_overflow\nRunning Role Preserving MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\r',
 'twitter\n']

In [22]:
parallel_result.metadata

[{'msg_id': '517dbaba-1785dac91609606a38a1e6be',
  'submitted': datetime.datetime(2019, 10, 1, 10, 30, 46, 79924, tzinfo=tzlocal()),
  'started': datetime.datetime(2019, 10, 1, 10, 30, 46, 91905, tzinfo=tzlocal()),
  'completed': datetime.datetime(2019, 10, 1, 11, 57, 21, 848330, tzinfo=tzlocal()),
  'received': datetime.datetime(2019, 10, 1, 11, 57, 21, 858398, tzinfo=tzutc()),
  'engine_uuid': '0024ef03-a41eb7f01a757a20f4661eaa',
  'engine_id': 0,
  'follow': [],
  'after': [],
  'status': 'ok',
  'execute_input': None,
  'execute_result': None,
  'error': None,
  'stdout': 'enron\nRunning Role Preserving MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\rRunning Role Destroying MCMC...\n0.0%\r5.0%\r10.0%\r15.0%\r20.0%\r25.0%\r30.0%\r35.0%\r40.0%\r45.0%\r50.0%\r55.0%\r60.0%\r65.0%\r70.0%\r75.0%\r80.0%\r85.0%\r90.0%\r95.0%\r',
  'stderr': '',
  'outputs': [],
  'data': {}},
 {'msg_id': '674

In [29]:
%%px --targets 4

DATASETS

[0;31mOut[4:7]: [0m
['enron',
 'math_overflow',
 'movielens',
 'scopus_multilayer',
 'stack_overflow',
 'twitter']

## Serial Calculation

In [3]:
DATASETS = ['enron','math_overflow','movielens','scopus_multilayer','stack_overflow','twitter']
# DATASETS = ['twitter']


for data_name in DATASETS:
    print(f'DATASET: {data_name}')
    
    A = AnnotatedHypergraph.from_incidence(data_name, 
                                           root='../data/',
                                           relabel_roles=False,
                                           add_metadata=False)
    
    save_feature_study(A,
                       data_name=data_name,
                       shuffle_fraction=0.1, 
                       num_shuffles=1000,
#                        num_shuffles=20,
                       features=FEATURES,
#                        burn_fraction=None,
                       burn_fraction=10,
                       root='../results/',
                       verbose=True,
                       fail_hard=False
                       )

DATASET: enron
Running Role Preserving MCMC...
Running Role Destroying MCMC...
DATASET: math_overflow
Running Role Preserving MCMC...
Running Role Destroying MCMC...
DATASET: movielens
Running Role Preserving MCMC...
Running Role Destroying MCMC...
0.0%

KeyboardInterrupt: 

## Specialised Role-interaction Matrices

### Enron

In [3]:
A = AnnotatedHypergraph.from_incidence('enron', 
                                       root='../data/',
                                       relabel_roles=False,
                                       add_metadata=False)

A.assign_role_interaction_matrix(np.array([[0.2,1,0.8],[0.2,0.2,0.2],[0.2,0.2,0.2]]))

save_feature_study(A,
                   data_name='enron_full',
                   shuffle_fraction=0.1, 
                   num_shuffles=2000, 
                   features=FEATURES,
                   burn_fraction=200,
                   root='../results/',
                   verbose=True
                   )

Running Role Preserving MCMC...
Running Role Destroying MCMC...
95.0%

### StackOverFlow

In [None]:
A = AnnotatedHypergraph.from_incidence('stack_overflow', 
                                       root='../data/',
                                       relabel_roles=False,
                                       add_metadata=False)

A.assign_role_interaction_matrix(np.array([[0,0.1,0.1],[0.3,0.3,0.3],[1,0.5,0]]))

save_feature_study(A,
                   data_name='stack_overflow_r',
                   shuffle_fraction=0.1, 
                   num_shuffles=1000, 
                   features=FEATURES,
                   burn_fraction=10,
                   root='../results/',
                   verbose=True
                   )

### MathOverFlow

In [None]:
A = AnnotatedHypergraph.from_incidence('math_overflow', 
                                       root='../data/',
                                       relabel_roles=False,
                                       add_metadata=False)

A.assign_role_interaction_matrix(np.array([[0,0.1,0.1],[0.3,0.3,0.3],[1,0.5,0]]))

save_feature_study(A,
                   data_name='math_overflow_r',
                   shuffle_fraction=0.1, 
                   num_shuffles=1000, 
                   features=FEATURES,
                   burn_fraction=10,
                   root='../results/',
                   verbose=True
                   )

### Scopus Multilayer

In [None]:
A = AnnotatedHypergraph.from_incidence('scopus_multilayer', 
                                       root='../data/',
                                       relabel_roles=False,
                                       add_metadata=False)

A.assign_role_interaction_matrix(np.array([[0,1,0.5],[0.2,0.2,0.2],[1,0.25,0]]))

save_feature_study(A,
                   data_name='scopus_multilayer_r',
                   shuffle_fraction=0.1, 
                   num_shuffles=1000, 
                   features=FEATURES,
                   burn_fraction=10,
                   root='../results/',
                   verbose=True
                   )

### Twitter

In [None]:
A = AnnotatedHypergraph.from_incidence('twitter', 
                                       root='../data/',
                                       relabel_roles=False,
                                       add_metadata=False)

#['source', 'target', 'retweeter', 'retweeted']
A.assign_role_interaction_matrix(np.array([[0,0.75,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,1]]))

save_feature_study(A,
                   data_name='twitter_r',
                   shuffle_fraction=0.1, 
                   num_shuffles=1000, 
                   features=FEATURES,
                   burn_fraction=10,
                   root='../results/',
                   verbose=True
                   )

### MovieLens

In [None]:
G = A.to_weighted_projection(use_networkx=True)

In [None]:
import networkx as nx
import scipy.sparse as sp

M = nx.to_scipy_sparse_matrix(G)

In [None]:
evals, evecs = sp.linalg.eigs(M, k=1, return_eigenvectors=True)

In [None]:
evals

In [None]:
import matplotlib

In [None]:
T = nx.stochastic_graph(G)
M = nx.to_scipy_sparse_matrix(G)
evals, evecs = sp.linalg.eigs(M, k=1, return_eigenvectors=True)

In [None]:
import matplotlib.pylab as plt
plt.hist(evecs)