In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

import re
import multiprocessing
from collections import Counter
import itertools
import json
from IPython.display import display as disp
from colorsys import hsv_to_rgb, rgb_to_hsv

import numpy as np
# import bottleneck as bn
import pandas as pd
import networkx as nx
import igraph as ig
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='paper', style='whitegrid', color_codes=True, font_scale=1.8)
colorcycle = [(0.498, 0.788, 0.498),
              (0.745, 0.682, 0.831),
              (0.992, 0.753, 0.525),
              (0.220, 0.424, 0.690),
              (0.749, 0.357, 0.090),
              (1.000, 1.000, 0.600),
              (0.941, 0.008, 0.498),
              (0.400, 0.400, 0.400)]
%matplotlib inline
sns.set_palette(colorcycle)
mpl.rcParams['figure.max_open_warning'] = 65
mpl.rcParams['figure.figsize'] = [12, 7]

# import warnings
# warnings.simplefilter("ignore", category=DeprecationWarning)
# warnings.simplefilter("ignore", category=mpl.cbook.mplDeprecation)
# warnings.simplefilter("ignore", category=UserWarning)


from speclib import misc, loaders, graph, plotting

pd.set_option('display.max_rows', 55)
pd.set_option('display.max_columns', 10)
pd.set_option('display.width', 1000)
np.set_printoptions(linewidth=145)

# import pixiedust

In [2]:
datafiles = ['../../allan_data/weighted_graph_bluetooth.edgelist',
             '../../allan_data/weighted_graph_call.edgelist',
             '../../allan_data/weighted_graph_sms.edgelist'] 

For calls and SMS:
$$ w_{ij} = \frac{n_{ij}^{\alpha}}{ \sum_{ik} n_{ik}^{\alpha} } $$

For Bluetooth:
$$   w_{ij} = \frac{T_{ij}^{\alpha}}{ \sum_{ik} T_{ik}^{\alpha} } $$

$$ r = t^2/s^2 $$

$$ \bar{x} = \frac{\sum_{i > j} (x_i + x_j)}{2 w_{ij}} $$

$$ s^2 = \frac{\sum_{i > j} w_{ij}\left( (x_i - \bar{x})^2 + (x_j - \bar{x})^2 \right) }{\sum_{i < j} 2w_{ij}}  $$

$$ t^2 = \frac{\sum_{i > j} w_{ij}\left( (x_i - \bar{x}) (x_j - \bar{x}) \right) }{\sum_{i < j} 2w_{ij}} $$

Construct the dataset $ x_i, x_j, w_{ij} $, where $x_i$ and $x_j$ are questionaire variable for persons $i$ and $j$, and $w_{ij}$ are the weight of their connection.

In [3]:
ua = loaders.Useralias() 
qdf = pd.read_json('../../allan_data/RGender_.json')
qdf.index = qdf.index.map(lambda x: ua[x]) 
qq = misc.QuestionCompleter(qdf)
qdf.head() 

Unnamed: 0,alcohol_binge10__answer,alcohol_binge10__answer_type,alcohol_binge10__condition,alcohol_binge10__question,alcohol_binge10__response,...,worries_zieblings__answer,worries_zieblings__answer_type,worries_zieblings__condition,worries_zieblings__question,worries_zieblings__response
u0001,1.0,radio,alcohol_debut != 'Jeg har aldrig drukket alkohol',Drukket mere end 10 genstande på en dag/aften,1 gange,...,3.0,radio,False,Søskende,Sjældent
u0002,1.0,radio,alcohol_debut != 'Jeg har aldrig drukket alkohol',Drukket mere end 10 genstande på en dag/aften,1 gange,...,,radio,False,Søskende,Har ingen
u0003,2.0,radio,alcohol_debut != 'Jeg har aldrig drukket alkohol',Drukket mere end 10 genstande på en dag/aften,2 gange,...,2.0,radio,False,Søskende,Af og til
u0004,1.0,radio,alcohol_debut != 'Jeg har aldrig drukket alkohol',Drukket mere end 10 genstande på en dag/aften,1 gange,...,,radio,False,Søskende,Har ingen
u0006,0.0,radio,alcohol_debut != 'Jeg har aldrig drukket alkohol',Drukket mere end 10 genstande på en dag/aften,0 gange,...,3.0,radio,False,Søskende,Sjældent


In [4]:
def alpha_weighted(g, u, v, alpha):
    u_edges = g[u]
    u_edges_sum = sum(u_edges[v]['weight']**alpha for v in u_edges)
    return u_edges[v]['weight']**alpha / u_edges_sum 

In [13]:
q = qdf.alcohol_binge10__answer

In [24]:
gca = nx.read_edgelist(datafiles[1], create_using=nx.DiGraph()) 

print(q.notna().mean())

gca_q = gca.subgraph(q.index[q.notna()].tolist())

gca_qu = graph.nxDiGraph2Graph(gca_q)

gam_qu = np.array(nx.adjacency_matrix(gca_qu).todense())

0.949685534591195


In [131]:
n_alpha = 8
w = np.zeros((*gamu.shape, n_alpha))
alpha = np.linspace(0, 2, n_alpha)
N = gamu.shape[0] 
for i in range(N):
    for j in range(i):
        if gamu[i, j] != 0.0:
            numerator = gamu[i, j] ** alpha
        else:
            numerator = np.zeros(n_alpha)
        denominator = sum((el ** alpha for el in gamu[i, (gamu[i, :] != 0)]))
        res = numerator / denominator
        w[i, j, :] = res
        w[j, i, :] = res

alpha = np.linspace(0, 2, n_alpha)
r = np.zeros((gamu.shape[0], n_alpha))
x_mean = 0
for i in range(gamu.shape[0]):
    for j in range(i):
        xi, xj = q.loc[ui], q.loc[uj]
        x_mean += (w[i, j, :] * (xi + xj))/(2*(w[i, j, :]))

for i in range(gamu.shape[0]):
    for j in range(i):
        xi, xj = q.loc[ui], q.loc[uj]
        t_sq = (w[i, j, :] * (xi - x_mean) * (xj - x_mean)) / (2*w[i, j, :])
        s_sq = ()/()
        r[i, :] = t_sq / s_sq

TypeError: data type not understood