In [1]:
import psycopg2

import pandas as pd
import numpy as np

from copy import deepcopy

import ast
import random
import networkx as nx
import time, unicodedata
import itertools

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

from joblib import Parallel, delayed

In [2]:
def clean(name, min_len=5, junk_replacement=''):
    try:
        cleaned = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').lower().decode("ascii")
    except TypeError:
        return junk_replacement
    if len(cleaned) < min_len:
        return junk_replacement
    return cleaned

def get_matches_edit_distance(item, choices, limit, scorer=fuzz.WRatio):
    return process.extract(item, choices, limit=limit, scorer=scorer)
counter = 0
def get_sehir_twitter_matches(twitter_users, sehir_directory, limit=1):
    global fullnames, counter
    twitter_user_by_screen_name = twitter_users.set_index('screen_name')
    start = time.time()
    for screen_name in twitter_users['screen_name']:
        twitter_name = twitter_user_by_screen_name.loc[screen_name]['name']
        match_name = get_matches_edit_distance(twitter_name, fullnames, limit)
        counter += 1
#         if counter %100 == 0:
#             print(counter, "out of ", len(twitter_users))
#             start_ = time.time()
#             print(start_-start, "seconds")
#             start = start_
        yield (screen_name, match_name)
        
def filter_matches_by_threshold(matches_dict, threshold=0):
    filtered_dict = dict()
    for screen_name, matches in matches_dict.items():
        filtered = [(match, score) for match, score in matches if score > threshold]
        
        if filtered:
            filtered_dict[screen_name] = filtered
        
    return filtered_dict

def get_matches_dataframe(twitter_users, sehir_directory, threshold=0, limit=1):
    matches = {screen_name : match_name for screen_name, match_name in 
               get_sehir_twitter_matches(twitter_users, sehir_directory, limit=limit)}
    
    filtered_matches = filter_matches_by_threshold(matches, threshold=threshold)
    screen_names = filtered_matches.keys()
    return pd.DataFrame({'screen_name': list(screen_names),
                         'match_name': [filtered_matches[screen_name] for screen_name in screen_names]})

In [3]:
connection = psycopg2.connect('dbname=link_formation host=localhost user=postgres password=1_sehir_1')

twitter_users = pd.read_sql("SELECT * FROM twitter_user", connection)

user_connections = pd.read_sql("SELECT * FROM twitter_connection", connection).drop('id', axis=1)

In [4]:
twitter_users.sample(5)

Unnamed: 0,id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
16789,855559014155001856,Balance le Pop Corn,Balancepopcorn,fr,Sepp,68,131,1970
15875,3576163462,Ahmet Tarık Eren,ahmettarikeren,tr,Ahmet Tarik Eren,93,131,162
16529,985804261006200832,Yasin,Yasin98404032,tr,Msn -,60,17,54
19995,2715352408,selim kavis,selim_kavis,tr,! Kaib,68,1,54
9298,1233906308,Kurumsal Grup,KurumsalGrup,tr,Kumsal Gokalp,64,128,780


In [5]:
no_sehir = lambda x:"sehir" in clean(x)

In [6]:
twitter_users = pd.read_sql("SELECT * FROM twitter_user", connection)
twitter_users = twitter_users.where(twitter_users.match_name.str.len()>6)\
                             .dropna().set_index("id")
#         .where(twitter_users.match_ratio>85)
# .where(~twitter_users.name.str.contains("(?i)sehir"))\
twitter_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9.121955e+17,Hanefî Usûlü,hanefiusulu,tr,Hafsa Ulusal,61.0,1180.0,88.0
4322558000.0,ESOT.,sureyyalism,tr,Sureyya Yilmazer,67.0,60.0,420.0
755943000.0,eren semih,lzmsz_adam,tr,! LMS Admin,63.0,136.0,1074.0
350907200.0,Fatih Ünal,unlfatih,tr,Fatih Durgun,75.0,288.0,256.0
1893746000.0,Academy Lingua Dil,AcademyLingua,tr,Academia,76.0,191.0,939.0


In [7]:
twitter_users["is_org"] = twitter_users.name.apply(no_sehir)
twitter_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2918210000.0,Serkan KOCA,serkankocaa,tr,Serkan Akcay,78.0,1042.0,707.0,False
3019587000.0,buldumaldımsattım,bulalsa_bualsat,tr,Busra Bulut,62.0,91.0,729.0,False
1303798000.0,Seslendirme Dublaj,seslendirme,tr,Bilgilendirme,67.0,554.0,1813.0,False
1592013000.0,Arslan ZİYLAN,ziylan1,tr,Arslan Ziylan,83.0,172.0,850.0,False
1262309000.0,itiraf şehir,itirafsehir,tr,Printer Sehir,67.0,1.0,10.0,True


In [8]:
sehir_orgs = twitter_users[twitter_users.is_org==True].drop(labels=["is_org"], axis=1)
sehir_users = twitter_users[twitter_users.is_org==False].drop(labels=["is_org"], axis=1)

In [9]:
sehir_orgs.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
7.602387e+17,Muhammed Sehirli,SehirliMuhammed,en,Muhammed Celik,72.0,1.0,32.0
4217414000.0,Şehir Gündemi,sehirgundemi,tr,Ergun Demiro,75.0,133.0,575.0
2976629000.0,sivil şehirli,SivilSehirli,en-gb,Sehir Sehir,61.0,37.0,148.0
8.557354e+17,sehiryeryuzugencleri,yeryuzugencleri,en,Yeryuzu Gencleri,97.0,49.0,117.0
7.963099e+17,ŞehirMYO,SehirMYO,tr,Sehir FM,75.0,127.0,35.0


In [10]:
sehir_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1460581000.0,Hünkar Genç,hunkargenc,tr,! Arge,90.0,134.0,858.0
405677200.0,Berfin Serindağ,beffyou,tr,Tala Mustafa Yousef Yousef,64.0,164.0,197.0
3385957000.0,ismet,ismetozdgn,tr,Ismet Can Ozdogan,74.0,100.0,93.0
187214100.0,Dede Reis,DedelerCikmasin,en,Serkan Dede,66.0,4.0,83.0
7.046602e+17,Mahmut TANRIKULU,mtanrikulu61,tr,! Tarih,72.0,189.0,769.0


In [11]:
truncate = lambda x: int(str(int(x))[:9])

In [12]:
resetted = twitter_users.reset_index()
index = resetted.id.apply(truncate)
resetted.rename(columns={"id":"tw_id"}, inplace=True)
resetted.index = index

twitter_users = resetted
twitter_users.sample(5)

Unnamed: 0_level_0,tw_id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
90976741,90976740.0,Abdullah Dursun,sismanadam,tr,Saman Madadi,64.0,154.0,478.0,False
306876328,306876300.0,Hüseyin S,h_sey,tr,Hüseyin Alptekin,72.0,5.0,163.0,False
181272713,181272700.0,Busra Sariaslan,busrasariaslan6,tr,Busra Arslan,81.0,88.0,34.0,False
332173478,3321735000.0,izzet bakan,BakanIzzet,tr,! Kaib,68.0,1014.0,3677.0,False
487814423,4878144000.0,خـــالــد ابــن عـــمــر,kxd_12,en,AB1_1302 (140),45.0,184.0,2209.0,False


In [13]:
sehir_directory = pd.read_csv('../datasets/contacts.csv', 
                               encoding = "ISO-8859-1", 
                               usecols=['First Name', 'Last Name', 'Primary Email'])
sehir_directory.replace(np.nan, '', regex=True, inplace=True)

In [14]:
fullnames = [' '.join(first_last_name).lower() 
                 for first_last_name in sehir_directory[['First Name', 'Last Name']].values]

In [15]:
start = time.time()
sehir_matches = Parallel(n_jobs=-1)(delayed(get_matches_dataframe)(
    twitter_users[int(i*(len(twitter_users)/8)):int((i+1)*(len(twitter_users)/8))],
    sehir_directory) for i in range(8))
print("took: ", time.time()-start)





took:  1959.82204246521


In [16]:
sehir_matches_df = pd.concat(sehir_matches)
sehir_matches_df.index = range(len(sehir_matches_df))
print("There are {} matches".format(len(sehir_matches_df)))
sehir_matches_df.sample(5)

There are 20674 matches


Unnamed: 0,match_name,screen_name
11836,"[( ismail kara, 90)]",ismail63206146
12481,"[( global, 67)]",Gzllba
18973,"[(ihsan f. i. albittar albittar, 86)]",ismaillugur
12728,"[( global, 90)]",urbanhist
20494,"[(ieee kulubu, 86)]",gtuieee


In [17]:
sehir_matches_df['match_ratio'] = sehir_matches_df.match_name.apply(lambda x: x[0][1])
sehir_matches_df.match_name = sehir_matches_df.match_name.apply(lambda x: x[0][0])
sehir_matches_df.sample(5)

Unnamed: 0,match_name,screen_name,match_ratio
6500,fatma neslihan tutuncu,FDurgungoz,86
7351,deniz haj abrahim,muhammathamza,86
18454,muge akarsu,bngisuab,90
16646,edanur saluk,eaudeluna,82
9375,eda kurt,Emineedahasta,90


In [18]:
tu=twitter_users.drop(labels=["match_name","match_ratio"], axis=1)

In [19]:
twitter_users = sehir_matches_df.merge(tu, on="screen_name")
index = twitter_users.tw_id.apply(truncate)
twitter_users["id"] = index
twitter_users.set_index("id", inplace=True)
twitter_users.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
345917380,mehmet baran85,hyrlisibeglm,67,345917400.0,EMBaran,tr,1410.0,211.0,False
95451420,rukiye ozturk,yesimgizer,68,95451420.0,yeşo,tr,473.0,1637.0,False
748087265,zulal icoz,ilci_zulal,74,7.480873e+17,Zülal Ilci,en,9.0,61.0,False
388385729,fatma eslem akbiyik,eslemuzunkaya,86,388385700.0,eslem yıldız,tr,8.0,245.0,False
294990622,yasemin atagul,YSMNSLK,90,294990600.0,yasemin,tr,162.0,605.0,False


In [20]:
twitter_users.to_csv("../REST/static/twitter_users.csv", index_label="id")

In [15]:
twitter_users = pd.read_csv("../REST/static/twitter_users.csv", index_col="id")
twitter_users.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
484031646,elif yilmaz,Elifylmz12345,91,4840316000.0,Elif yılmaz,tr,1.0,15.0,False
410242678,mehmet korkmaz,M_Ali_Korkmaz,95,410242700.0,Mehmet Ali Korkmaz,tr,73.0,101.0,False
206661587,ersin onur erdogan,ersinoe,94,206661600.0,ersin onur erdoğan,en,184.0,156.0,False
582097020,cemalettin orkcu,Cemalettinorkcu,93,582097000.0,Cemalettin Örkcü,tr,486.0,89.0,False
436226006,zeynep yilmaz,zeynep_ylmz1,92,436226000.0,zeynep_yilmaz,en,34.0,167.0,False


In [16]:
twitter_users.loc[291122559]

match_name            ammar rasid
screen_name        AmmarRashed_MB
match_ratio                    87
tw_id                 2.91123e+08
name                 Ammar Rashed
lang                           en
followers_count               385
friends_count                 335
is_org                      False
Name: 291122559, dtype: object

In [17]:
filtered_twu = twitter_users[twitter_users.match_ratio>90]
filtered_twu = filtered_twu.append(twitter_users.loc[291122559])

## Assigning random communities

In [155]:
num_communities = max(len(G.nodes())//20,3)
filtered_twu["community"] = [np.random.randint(0, num_communities) for i in range(len(filtered_twu))]

In [160]:
# filtered_twu.to_csv("../datasets/twitter_users.csv", index_label="id")
filtered_twu.to_csv("../REST/static/twitter_users.csv", index_label="id")

In [158]:
len(twitter_users), len(filtered_twu)

(1597, 1597)

In [159]:
filtered_twu[filtered_twu.is_org==True]

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org,community
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
111082356,sehir sehir,sehirlibrary,95,1110824000.0,Sehir Library,tr,911.0,43.0,True,4
106086098,åehir ãniversitesi,SehirUniversite,94,106086100.0,ŞEHİR Üniversitesi,tr,12195.0,166.0,True,1
819269230,sehir sehir,sehir_alumni,95,8.192692e+17,Sehir Alumni,en,275.0,5.0,True,28
841682908,! sehir kariyer fest,sehircareerfest,94,8.416829e+17,Şehir Kariyer Fest,tr,241.0,220.0,True,32
847741204,sehir cycling club,SehirCycling,94,8.477412e+17,Şehir Cycling Club,tr,123.0,92.0,True,26
174415744,sehir sehir,sehiredebiyat,95,174415700.0,sehir edebiyat,en,2070.0,287.0,True,8
856911339,sehir sehir,sehirwibclub,95,8.569113e+17,Sehir WIB Club,tr,61.0,140.0,True,9
803581588,sehir dance club,SehirDance,100,8.035816e+17,Sehir Dance Club,tr,38.0,219.0,True,17
283216865,åehir meydanä±,sehirmeydani,92,2832169000.0,Şehir Meydanı,tr,66.0,41.0,True,13
271718939,åehir ãniversitesi,SehirDedikodu,97,2717189000.0,Şehir Üniversitesi,tr,150.0,789.0,True,9


In [20]:
filtered_twu.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
826480844,mert erbudak,merterbudak,100,826480800.0,mert erbudak,tr,14.0,56.0,False
880301675,ali erken,demalieren,94,8.803017e+17,ali eren,tr,31.0,544.0,False
350533754,omer aslan,omeraslan_tweet,95,3505338000.0,Ömer ASLAN,tr,6.0,23.0,False
261245071,tuba yuce,tuba_yuce,94,2612451000.0,Tuba Yüce,tr,102.0,31.0,False
373568051,yasmin celik,Yaseminelik10,92,373568100.0,Yasemin Çelik,tr,3.0,463.0,False


In [21]:
filtered_twu[filtered_twu.screen_name.str.contains("kral")]

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
14668733,ahmet bulut,kral,100,14668733.0,Ahmet Bulut,en,243.0,62.0,False


# Random Connections

In [161]:
from datetime import datetime

In [162]:
def present_in_date(changes_dates, queried_date):
    """
    changes_dates = {d1:True, d2:False, d3:True} connection added or removed
    """
    if changes_dates:
        str2date = lambda strdate: datetime.strptime(strdate, '%Y.%m.%d')  # 2018.05.08
        changes = sorted(changes_dates,key=lambda d: str2date(d))
        queried_date = datetime.strptime(queried_date, '%Y.%m.%d')
        present = False
        for d in changes:
            if queried_date < str2date(d):
                break
            present = changes_dates[d]
        return present
    else:
        return False

In [163]:
def connections_dict2df(cons):
    d = {i:[k[0],k[1], cons[k]] for i,k in enumerate(cons)}
    df = pd.DataFrame.from_dict(d, orient='index')
    df.columns=["from_user_id","to_user_id","formation"]
    return df

In [165]:
def generate_cons_dict(nodes, edges_per_date=None, add_prob=0.5, remove_prob=0.5, dates=["2018.05.%02d"%x for x in range(1,31)]):
    dates = sorted(dates)
    start = time.time()
    if edges_per_date is None:
        edges_per_date = len(filtered_twu)
        
    random_connections = dict()  # {(from, to):{date1:True, date2:False, ...}}
    connection_indices = {}
    
    for d in dates:
        for i in range(edges_per_date):
            from_ = nodes.sample(1).index[0]
            to = nodes.sample(1).index[0]
            if from_!=to:
#                 random_connections.setdefault((from_, to), {})
                try:
                    das = random_connections[(from_, to)]
                except KeyError:
                    if np.random.random()<add_prob:
                        random_connections[from_, to]= {d:True}
                else:
                    pres = present_in_date(das, d)
                    if pres:
                        if np.random.random()<remove_prob:
                            das[d] = False
                    else:
                        das[d] = True
                    random_connections[(from_, to)] = das
    
    print("took: ",time.time()-start)
    return random_connections

In [166]:
cons_dict = generate_cons_dict(filtered_twu, add_prob=0.35, remove_prob=1)

took:  28.426517248153687


In [167]:
len(cons_dict)

16930

In [168]:
start = time.time()
random_connections = connections_dict2df(cons_dict)
print("took:", time.time()-start)
# random_connections.to_csv("../datasets/random_connections.csv")
random_connections.to_csv("../REST/static/random_connections.csv")
random_connections.head(5)

took: 0.029363155364990234


Unnamed: 0,from_user_id,to_user_id,formation
0,323414232,580832386,{'2018.05.10': True}
1,376693951,140011527,{'2018.05.02': True}
2,236443603,762975238,{'2018.05.02': True}
3,831765693,495928885,{'2018.05.11': True}
4,453900488,139265919,{'2018.05.19': True}


In [169]:
for _,i in random_connections.iterrows():
    if len(i.formation)>1:
        print(_, i)
        break

115 from_user_id                                    227292161
to_user_id                                      164903702
formation       {'2018.05.28': False, '2018.05.17': True}
Name: 115, dtype: object


## Construct the network

In [170]:
user_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
10092,372162808,106086098,{'2018.05.08': True}
2379,930879283224104963,106086098,{'2018.05.08': True}
28973,105879832,174415744,{'2018.05.08': True}
435,610820924,106086098,{'2018.05.08': True}
25303,811484144450633728,1536995378,{'2018.05.08': True}


In [171]:
random_connections["first_date"] = random_connections.formation.apply(
    lambda dates: present_in_date(dates, "2018.05.01"))
random_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation,first_date
5124,37036731,238165726,{'2018.05.03': True},False
127,320736080,109881388,{'2018.05.10': True},False
7829,863569502,155307568,{'2018.05.05': True},False
15115,323385174,846642751,{'2018.05.23': True},False
6994,142036636,508905512,{'2018.05.28': True},False


In [172]:
G = nx.DiGraph()
# for _, row in user_connections.iterrows():
truncate = lambda x: int(str(int(x))[:9])
for _, row in random_connections[random_connections.first_date==True].iterrows():    
    from_ = truncate(row["from_user_id"])
    to = truncate(row["to_user_id"])
    if from_ in filtered_twu.index and to in filtered_twu.index:
        G.add_edge(from_, to)

In [173]:
augs = ["name", "screen_name","match_name", "followers_count","friends_count", "lang"]
for node in G.nodes():
    user = twitter_users.loc[node]
    for aug in augs:
        if aug=="lang":
            m = user[aug]
        elif type(user[aug])==str:
            m = clean(user[aug])
        else:
            m = user[aug]
        G.nodes[node][aug] = m

In [174]:
len(G.nodes())

841

In [175]:
len(G.edges())

592

In [176]:
for ix,deg in G.degree(G.nodes()):
    G.node[ix]['degree'] = deg
    G.node[ix]['parity'] = (1-deg%2)
    
for ix,in_deg in G.in_degree(G.nodes()):
    G.node[ix]['in_degree'] = in_deg
    
for ix,out_deg in G.out_degree(G.nodes()):
    G.node[ix]['out_degree'] = out_deg

In [177]:
evc = nx.eigenvector_centrality(G, max_iter=500)
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)
pagerank = nx.pagerank(G)

In [178]:
metrics = {"eigenvector_centrality":evc,
           "closeness_centrality":closeness,
          "betweenness":betweenness,
          "pagerank":pagerank}

In [179]:
for metric_name, metric in metrics.items():
    for ix,v in metric.items():
        G.nodes[ix][metric_name] = v

In [180]:
list(G.nodes(data=True))[0]

(595297280,
 {'betweenness': 0.0,
  'closeness_centrality': 0.0011904761904761906,
  'degree': 1,
  'eigenvector_centrality': 4.769814054833304e-09,
  'followers_count': 71.0,
  'friends_count': 310.0,
  'in_degree': 1,
  'lang': 'en',
  'match_name': 'talha hasan turkoglu',
  'name': 'talha turkoglu',
  'out_degree': 0,
  'pagerank': 0.0013285987595611335,
  'parity': 0,
  'screen_name': 'talha_turkoglu'})

In [181]:
import json
from networkx.readwrite import json_graph
data = nx.node_link_data(G)
with open('../REST/static/networks/twitter_users_graph2.json', 'w') as f:
    json.dump(data, f, indent=4)

## Calculating Homophily

In [182]:
def homophily(nw, metric="lang"):
    langs_probs = dict()
    for n in nw.nodes():
        user = nw.nodes[n]
        langs_probs.setdefault(user[metric], 0)
        langs_probs[user[metric]] += 1
    heterogeneity_fraction_norm = 1 - sum(
        [(float(i)/len(nw.nodes()))**2 for i in langs_probs.values()])
    cross_edges = sum(
        [int(nw.nodes[f][metric] != nw.nodes[t][metric] ) for f,t in nw.edges()])
    cross_metric_ratio = cross_edges/float(len(nw.edges()))
    print("cross-metric edges ratio: ", cross_metric_ratio)
    print("Heterogeneity Fraction Norm", heterogeneity_fraction_norm)
    return cross_metric_ratio < heterogeneity_fraction_norm

In [183]:
homophily(G)

cross-metric edges ratio:  0.3597972972972973
Heterogeneity Fraction Norm 0.35192801729439926


False