In [1]:
import psycopg2

import pandas as pd
import numpy as np

from copy import deepcopy

import ast
import random
import networkx as nx
import time, unicodedata
import itertools

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

from joblib import Parallel, delayed

In [2]:
def clean(name, min_len=5, junk_replacement=''):
    try:
        cleaned = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').lower().decode("ascii")
    except TypeError:
        return junk_replacement
    if len(cleaned) < min_len:
        return junk_replacement
    return cleaned

def get_matches_edit_distance(item, choices, limit, scorer=fuzz.WRatio):
    return process.extract(item, choices, limit=limit, scorer=scorer)
counter = 0
def get_sehir_twitter_matches(twitter_users, sehir_directory, limit=1):
    global fullnames, counter
    twitter_user_by_screen_name = twitter_users.set_index('screen_name')
    start = time.time()
    for screen_name in twitter_users['screen_name']:
        twitter_name = twitter_user_by_screen_name.loc[screen_name]['name']
        match_name = get_matches_edit_distance(twitter_name, fullnames, limit)
        counter += 1
#         if counter %100 == 0:
#             print(counter, "out of ", len(twitter_users))
#             start_ = time.time()
#             print(start_-start, "seconds")
#             start = start_
        yield (screen_name, match_name)
        
def filter_matches_by_threshold(matches_dict, threshold=0):
    filtered_dict = dict()
    for screen_name, matches in matches_dict.items():
        filtered = [(match, score) for match, score in matches if score > threshold]
        
        if filtered:
            filtered_dict[screen_name] = filtered
        
    return filtered_dict

def get_matches_dataframe(twitter_users, sehir_directory, threshold=0, limit=1):
    matches = {screen_name : match_name for screen_name, match_name in 
               get_sehir_twitter_matches(twitter_users, sehir_directory, limit=limit)}
    
    filtered_matches = filter_matches_by_threshold(matches, threshold=threshold)
    screen_names = filtered_matches.keys()
    return pd.DataFrame({'screen_name': list(screen_names),
                         'match_name': [filtered_matches[screen_name] for screen_name in screen_names]})

In [3]:
connection = psycopg2.connect('dbname=link_formation host=localhost user=postgres password=1_sehir_1')

twitter_users = pd.read_sql("SELECT * FROM twitter_user", connection)

user_connections = pd.read_sql("SELECT * FROM twitter_connection", connection).drop('id', axis=1)

In [4]:
twitter_users.sample(5)

Unnamed: 0,id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
5986,4663992215,Victoria Young,vivyg123456,en,! IYBF,45,6,50
16210,2601146536,ElifTzgn,ElifTzgn,tr,Elif Bozgan,74,186,400
12505,82815013,Koray Sevindi,koraysevindi,en,Sule Yusra Sevindik,68,689,462
20739,4715436604,Genç ISTAC,YoungISTAC,en,Orcun Kitapci,61,927,32
13031,4026433936,ali yılmaz,yilmazali174,tr,Yilmaz Filiz,67,159,582


In [5]:
no_sehir = lambda x:"sehir" in clean(x)

In [6]:
twitter_users = pd.read_sql("SELECT * FROM twitter_user", connection)
twitter_users = twitter_users.where(twitter_users.match_name.str.len()>6)\
                             .dropna().set_index("id")
#         .where(twitter_users.match_ratio>85)
# .where(~twitter_users.name.str.contains("(?i)sehir"))\
twitter_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2972874000.0,elifdvlc,elfdvlc,tr,Elif Develi,67.0,163.0,648.0
2765427000.0,yağsem,YagmurYasar8,tr,Guluzar Yagmur Aksar,75.0,60.0,49.0
9.508403e+17,acikmurat@hotmail.com,acikmurathotma1,en,Emrah Avci,60.0,1.0,95.0
217442200.0,mahmut damla kudas,mhmtkds,tr,Mehmet Köse,71.0,14.0,200.0
14159150.0,United Nations,UN,en,Tugba Akgun,90.0,10388611.0,1165.0


In [7]:
twitter_users["is_org"] = twitter_users.name.apply(no_sehir)
twitter_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
8.332642e+17,Kubi(istanbul)Rehber,bozcalim_kubi,tr,Social,60.0,41.0,812.0,False
2357028000.0,meryem betül koçak,Eygecee,tr,Esma Husniye Celebioglu,64.0,1009.0,700.0,False
1405173000.0,tuğba serin,tubaserin2,tr,Tugba Sezgin,73.0,8.0,16.0,False
496343600.0,Yusuf Ziya Altıntaş,yusufziya_a,tr,Yusuf Yasar,73.0,397.0,490.0,False
4256563000.0,ibrahim yılmaz,iyilmazmeb,tr,Muhammed Ali Yilmaz,70.0,486.0,105.0,False


In [8]:
sehir_orgs = twitter_users[twitter_users.is_org==True].drop(labels=["is_org"], axis=1)
sehir_users = twitter_users[twitter_users.is_org==False].drop(labels=["is_org"], axis=1)

In [9]:
sehir_orgs.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
741687400.0,Şehirde Hayat Var,yasasinhayat64,tr,Yassin Awad,64.0,50.0,358.0
3351438000.0,Sehir15 Girisliler,Sehir15,tr,Sehir Sehir,86.0,12.0,39.0
2388078000.0,Şiir Şehirde,siirsehirde,tr,Sehir Sehir,73.0,4.0,17.0
8.419928e+17,Nevşehir Hacı Bektaş Veli Üniversitesi,nevsehiruniv,tr,! HPE Sehirunv,75.0,1781.0,244.0
8.569113e+17,Sehir WIB Club,sehirwibclub,tr,! Clubs,80.0,61.0,140.0


In [10]:
sehir_users.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2871499000.0,AKÜ SOSYOLOJİ,AkuSosyoloji,tr,! Sosyoloji,86.0,183.0,164.0
3699806000.0,KAÇAKÇI KURBAN,Yorgun_Kacak,tr,Orhun Kara,64.0,416.0,1941.0
9434942.0,Sena Kuzu,senakuzu,en,Sena Kuzu,94.0,42.0,69.0
102039400.0,İBB Gençlik Meclisi,GenclikMeclisi,tr,! Guvenlik,68.0,21499.0,8.0
76892600.0,irfan yıldız,grenart,tr,Yaren Aktas,64.0,276.0,1182.0


In [11]:
truncate = lambda x: int(str(int(x))[:9])

In [12]:
resetted = twitter_users.reset_index()
index = resetted.id.apply(truncate)
resetted.rename(columns={"id":"tw_id"}, inplace=True)
resetted.index = index

twitter_users = resetted
twitter_users.sample(5)

Unnamed: 0_level_0,tw_id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
275501671,2755017000.0,Elvan Biçicioğlu,EBicicioglu,tr,Ilayda Demircioglu,68.0,13.0,152.0,False
256729853,256729900.0,İç Ses,thatSbisMe,tr,! Some,68.0,142.0,365.0,False
740670272,7.406703e+17,Eylül Okur,soceylul,tr,! Some,68.0,40.0,227.0,False
267200305,2672003000.0,Hilal Uysal,huysal95,tr,Muhammed Talha Uysal,68.0,105.0,165.0,False
934539958,9.3454e+17,Nebevi Metod,nebevi_metod,tr,Bim Test,56.0,278.0,869.0,False


In [13]:
sehir_directory = pd.read_csv('../datasets/contacts.csv', 
                               encoding = "ISO-8859-1", 
                               usecols=['First Name', 'Last Name', 'Primary Email'])
sehir_directory.replace(np.nan, '', regex=True, inplace=True)

In [14]:
fullnames = [' '.join(first_last_name).lower() 
                 for first_last_name in sehir_directory[['First Name', 'Last Name']].values]

In [15]:
start = time.time()
sehir_matches = Parallel(n_jobs=-1)(delayed(get_matches_dataframe)(
    twitter_users[int(i*(len(twitter_users)/8)):int((i+1)*(len(twitter_users)/8))],
    sehir_directory) for i in range(8))
print("took: ", time.time()-start)





took:  1959.82204246521


In [16]:
sehir_matches_df = pd.concat(sehir_matches)
sehir_matches_df.index = range(len(sehir_matches_df))
print("There are {} matches".format(len(sehir_matches_df)))
sehir_matches_df.sample(5)

There are 20674 matches


Unnamed: 0,match_name,screen_name
11836,"[( ismail kara, 90)]",ismail63206146
12481,"[( global, 67)]",Gzllba
18973,"[(ihsan f. i. albittar albittar, 86)]",ismaillugur
12728,"[( global, 90)]",urbanhist
20494,"[(ieee kulubu, 86)]",gtuieee


In [17]:
sehir_matches_df['match_ratio'] = sehir_matches_df.match_name.apply(lambda x: x[0][1])
sehir_matches_df.match_name = sehir_matches_df.match_name.apply(lambda x: x[0][0])
sehir_matches_df.sample(5)

Unnamed: 0,match_name,screen_name,match_ratio
6500,fatma neslihan tutuncu,FDurgungoz,86
7351,deniz haj abrahim,muhammathamza,86
18454,muge akarsu,bngisuab,90
16646,edanur saluk,eaudeluna,82
9375,eda kurt,Emineedahasta,90


In [18]:
tu=twitter_users.drop(labels=["match_name","match_ratio"], axis=1)

In [19]:
twitter_users = sehir_matches_df.merge(tu, on="screen_name")
index = twitter_users.tw_id.apply(truncate)
twitter_users["id"] = index
twitter_users.set_index("id", inplace=True)
twitter_users.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
345917380,mehmet baran85,hyrlisibeglm,67,345917400.0,EMBaran,tr,1410.0,211.0,False
95451420,rukiye ozturk,yesimgizer,68,95451420.0,yeşo,tr,473.0,1637.0,False
748087265,zulal icoz,ilci_zulal,74,7.480873e+17,Zülal Ilci,en,9.0,61.0,False
388385729,fatma eslem akbiyik,eslemuzunkaya,86,388385700.0,eslem yıldız,tr,8.0,245.0,False
294990622,yasemin atagul,YSMNSLK,90,294990600.0,yasemin,tr,162.0,605.0,False


In [20]:
twitter_users.to_csv("../datasets/twitter_users.csv", index_label="id")

In [15]:
twitter_users = pd.read_csv("../datasets/twitter_users.csv", index_col="id")
twitter_users.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
786513674,salih sahin,nihashilas,100,786513700.0,Salih Sahin,tr,713.0,624.0,False
459012541,beyzanur kocak,beyzanurkocak_,100,4590125000.0,beyzanur kocak,tr,22.0,56.0,False
161010365,damla baydemir,BaydemirDamla,100,1610104000.0,damla baydemir,tr,20.0,51.0,False
115344199,mehmet yilmaz,MehmetY38209386,92,1153442000.0,Mehmet Yılmaz,tr,199.0,586.0,False
14668733,ahmet bulut,kral,100,14668730.0,Ahmet Bulut,en,243.0,62.0,False


In [16]:
twitter_users.loc[291122559]

match_name            ammar rasid
screen_name        AmmarRashed_MB
match_ratio                    87
tw_id                 2.91123e+08
name                 Ammar Rashed
lang                           en
followers_count               385
friends_count                 335
is_org                      False
Name: 291122559, dtype: object

In [17]:
filtered_twu = twitter_users[twitter_users.match_ratio>90]
filtered_twu = filtered_twu.append(twitter_users.loc[291122559])
filtered_twu.to_csv("../datasets/twitter_users.csv", index_label="id")

In [18]:
len(twitter_users), len(filtered_twu)

(1597, 1597)

In [19]:
filtered_twu[filtered_twu.is_org==True]

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
111082356,sehir sehir,sehirlibrary,95,1110824000.0,Sehir Library,tr,911.0,43.0,True
106086098,åehir ãniversitesi,SehirUniversite,94,106086100.0,ŞEHİR Üniversitesi,tr,12195.0,166.0,True
819269230,sehir sehir,sehir_alumni,95,8.192692e+17,Sehir Alumni,en,275.0,5.0,True
841682908,! sehir kariyer fest,sehircareerfest,94,8.416829e+17,Şehir Kariyer Fest,tr,241.0,220.0,True
847741204,sehir cycling club,SehirCycling,94,8.477412e+17,Şehir Cycling Club,tr,123.0,92.0,True
174415744,sehir sehir,sehiredebiyat,95,174415700.0,sehir edebiyat,en,2070.0,287.0,True
856911339,sehir sehir,sehirwibclub,95,8.569113e+17,Sehir WIB Club,tr,61.0,140.0,True
803581588,sehir dance club,SehirDance,100,8.035816e+17,Sehir Dance Club,tr,38.0,219.0,True
283216865,åehir meydanä±,sehirmeydani,92,2832169000.0,Şehir Meydanı,tr,66.0,41.0,True
271718939,åehir ãniversitesi,SehirDedikodu,97,2717189000.0,Şehir Üniversitesi,tr,150.0,789.0,True


In [20]:
filtered_twu.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
141108970,aynur yilmaz,aynuryl66097645,92,1411090000.0,aynur yılmaz,tr,12.0,18.0,False
338613789,elif sezer,el_sezer,100,3386138000.0,Elif Sezer,en,56.0,60.0,False
286270314,zeynep demir,zeyy_demir,100,2862703000.0,Zeynep Demir,tr,170.0,119.0,False
350791386,ceylin yagmur can,ceylinyagmurcan,94,350791400.0,Ceylin Yağmur Can,tr,344.0,89.0,False
867510112,ahmet mursel dogan,AhmetMrselDGN42,91,8.675101e+17,Ahmet Mürsel DOĞAN,tr,5.0,46.0,False


In [21]:
filtered_twu[filtered_twu.match_name.str.contains("ammar")]

Unnamed: 0_level_0,match_name,screen_name,match_ratio,tw_id,name,lang,followers_count,friends_count,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
986265062,osamah al-ghammari,Sam_Al_Ghammari,91,986265062.0,Sam Al-Ghammari,en,1071.0,167.0,False
291122559,ammar rasid,AmmarRashed_MB,87,291122559.0,Ammar Rashed,en,385.0,335.0,False


# Random Connections

In [22]:
from datetime import datetime

In [23]:
def present_in_date(changes_dates, queried_date):
    """
    changes_dates = {d1:True, d2:False, d3:True} connection added or removed
    """
    if changes_dates:
        str2date = lambda strdate: datetime.strptime(strdate, '%Y.%m.%d')  # 2018.05.08
        changes = sorted(changes_dates,key=lambda d: str2date(d))
        queried_date = datetime.strptime(queried_date, '%Y.%m.%d')
        present = False
        for d in changes:
            if queried_date < str2date(d):
                break
            present = changes_dates[d]
        return present
    else:
        return False

In [24]:
def connections_dict2df(cons):
    d = {i:[k[0],k[1], cons[k]] for i,k in enumerate(cons)}
    df = pd.DataFrame.from_dict(d, orient='index')
    df.columns=["from_user_id","to_user_id","formation"]
    return df

In [56]:
def generate_cons_dict(nodes, edges_per_date=None, portion=2.5, dates=["2018.05.%02d"%x for x in range(1,31)], remove_edge_prob=0.5):
    dates = sorted(dates)
    start = time.time()
    if edges_per_date is None:
        edges_per_date = int(len(filtered_twu)//portion)
        
    random_connections = dict()  # {(from, to):{date1:True, date2:False, ...}}
    connection_indices = {}
    
    for d in dates:
        for i in range(edges_per_date):
            from_ = nodes.sample(1).index[0]
            to = nodes.sample(1).index[0]
            if from_!=to:
                random_connections.setdefault((from_, to), {})
                das = random_connections[(from_, to)]
                pres = present_in_date(das, d)
                if pres:
                    if np.random.random()>remove_edge_prob:
                        das[d] = False
                else:
                    das[d] = True
                random_connections[(from_, to)] = das
    
    print("took: ",time.time()-start)
    return random_connections

In [57]:
cons_dict = generate_cons_dict(filtered_twu)

took:  11.161903619766235


In [58]:
len(cons_dict)

19055

In [59]:
start = time.time()
random_connections = connections_dict2df(cons_dict)
print("took:", time.time()-start)
random_connections.to_csv("../datasets/random_connections.csv")
random_connections.head(5)

took: 0.025255203247070312


Unnamed: 0,from_user_id,to_user_id,formation
0,165638829,324673815,{'2018.05.04': True}
1,868134272,125907735,{'2018.05.26': True}
2,475761925,478731173,{'2018.05.26': True}
3,929796263,360884464,{'2018.05.23': True}
4,147682728,386419724,{'2018.05.21': True}


In [62]:
random_connections.loc[123]

from_user_id                                    165480681
to_user_id                                      133662529
formation       {'2018.05.11': True, '2018.05.16': False}
Name: 123, dtype: object

## Construct the network

In [63]:
user_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
3043,882233276106231809,106086098,{'2018.05.08': True}
3978,834824229195485186,106086098,{'2018.05.08': True}
20814,802432097650610176,1110823566,{'2018.05.08': True}
28145,2320659881,174415744,{'2018.05.08': True}
36348,1668387936,450639507,{'2018.05.08': True}


In [73]:
random_connections["first_date"] = random_connections.formation.apply(
    lambda dates: present_in_date(dates, "2018.05.01"))
random_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation,first_date
17161,754361184,296186755,{'2018.05.07': True},False
14807,737912659,66645671,{'2018.05.26': True},False
14246,145723363,378536512,{'2018.05.30': True},False
7180,236894044,636429203,{'2018.05.06': True},False
11088,124708008,147941917,{'2018.05.02': True},False


In [74]:
G = nx.DiGraph()
# for _, row in user_connections.iterrows():
for _, row in random_connections[random_connections.first_date==True].iterrows():    
    from_ = truncate(row["from_user_id"])
    to = truncate(row["to_user_id"])
    if from_ in filtered_twu.index and to in filtered_twu.index:
        G.add_edge(from_, to)

In [75]:
augs = ["name", "screen_name","match_name", "followers_count","friends_count", "lang"]
for node in G.nodes():
    user = twitter_users.loc[node]
    for aug in augs:
        if type(user[aug])==str:
            m = clean(user[aug])
        else:
            m = user[aug]
        G.nodes[node][aug] = m

In [76]:
len(G.nodes())

872

In [77]:
len(G.edges())

637

In [78]:
for ix,deg in G.degree(G.nodes()):
    G.node[ix]['degree'] = deg
    G.node[ix]['parity'] = (1-deg%2)
    
for ix,in_deg in G.in_degree(G.nodes()):
    G.node[ix]['in_degree'] = in_deg
    
for ix,out_deg in G.out_degree(G.nodes()):
    G.node[ix]['out_degree'] = out_deg

In [80]:
evc = nx.eigenvector_centrality(G, max_iter=500)
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)
pagerank = nx.pagerank(G)

In [81]:
metrics = {"eigenvector_centrality":evc,
           "closeness_centrality":closeness,
          "betweenness":betweenness,
          "pagerank":pagerank}

In [82]:
for metric_name, metric in metrics.items():
    for ix,v in metric.items():
        G.nodes[ix][metric_name] = v

In [84]:
G.nodes[754361184]

{'betweenness': 0.0,
 'closeness_centrality': 0.0015308075009567547,
 'degree': 1,
 'eigenvector_centrality': 3.6475555323569e-08,
 'followers_count': 0.0,
 'friends_count': 53.0,
 'in_degree': 1,
 'lang': '',
 'match_name': 'merve yilmaz',
 'name': 'merve ylmaz',
 'out_degree': 0,
 'pagerank': 0.001231228522441705,
 'parity': 0,
 'screen_name': 'mrv_ylmaz_'}

In [85]:
list(G.nodes(data=True))[0]

(450840581,
 {'betweenness': 5.278646555023292e-06,
  'closeness_centrality': 0.0015308075009567547,
  'degree': 3,
  'eigenvector_centrality': 3.6475555323569e-08,
  'followers_count': 258.0,
  'friends_count': 403.0,
  'in_degree': 1,
  'lang': '',
  'match_name': 'esra ozdil',
  'name': 'esraozdil',
  'out_degree': 2,
  'pagerank': 0.0010505886489707858,
  'parity': 0,
  'screen_name': 'essraozdil'})

In [86]:
import json
from networkx.readwrite import json_graph
data = nx.node_link_data(G)
with open('../REST/static/networks/twitter_users_graph2.json', 'w') as f:
    json.dump(data, f, indent=4)

## Calculating Homophily

In [87]:
def homophily(nw, metric="lang"):
    langs_probs = dict()
    for n in nw.nodes():
        user = nw.nodes[n]
        langs_probs.setdefault(user[metric], 0)
        langs_probs[user[metric]] += 1
    heterogeneity_fraction_norm = 1 - sum(
        [(float(i)/len(nw.nodes()))**2 for i in langs_probs.values()])
    cross_edges = sum(
        [int(nw.nodes[f][metric] != nw.nodes[t][metric] ) for f,t in nw.edges()])
    cross_metric_ratio = cross_edges/float(len(nw.edges()))
    print("cross-metric edges ratio: ", cross_metric_ratio)
    print("Heterogeneity Fraction Norm", heterogeneity_fraction_norm)
    return cross_metric_ratio < heterogeneity_fraction_norm

In [88]:
homophily(G)

cross-metric edges ratio:  0.01098901098901099
Heterogeneity Fraction Norm 0.0068570616951435515


False