In [1]:
import psycopg2

import pandas as pd
import numpy as np

from copy import deepcopy

import ast
import random
import networkx as nx
import time, unicodedata
import itertools

from fuzzywuzzy import fuzz
from fuzzywuzzy import process

from joblib import Parallel, delayed

In [2]:
def clean(name, min_len=5, junk_replacement=''):
    try:
        cleaned = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').lower().decode("ascii")
    except TypeError:
        return junk_replacement
    if len(cleaned) < min_len:
        return junk_replacement
    return cleaned

def get_matches_edit_distance(item, choices, limit, scorer=fuzz.WRatio):
    return process.extract(item, choices, limit=limit, scorer=scorer)
counter = 0
def get_sehir_twitter_matches(twitter_users, sehir_directory, limit=1):
    global fullnames, counter
    twitter_user_by_screen_name = twitter_users.set_index('screen_name')
    start = time.time()
    for screen_name in twitter_users['screen_name']:
        twitter_name = twitter_user_by_screen_name.loc[screen_name]['name']
        match_name = get_matches_edit_distance(twitter_name, fullnames, limit)
        counter += 1
#         if counter %100 == 0:
#             print(counter, "out of ", len(twitter_users))
#             start_ = time.time()
#             print(start_-start, "seconds")
#             start = start_
        yield (screen_name, match_name)
        
def filter_matches_by_threshold(matches_dict, threshold=70):
    filtered_dict = dict()
    for screen_name, matches in matches_dict.items():
        filtered = [(match, score) for match, score in matches if score > threshold]
        
        if filtered:
            filtered_dict[screen_name] = filtered
        
    return filtered_dict

def get_matches_dataframe(twitter_users, sehir_directory, threshold=70, limit=1):
    matches = {screen_name : match_name for screen_name, match_name in 
               get_sehir_twitter_matches(twitter_users, sehir_directory, limit=limit)}
    
    filtered_matches = filter_matches_by_threshold(matches, threshold=threshold)
    screen_names = filtered_matches.keys()
    return pd.DataFrame({'screen_name': list(screen_names),
                         'match_name': [filtered_matches[screen_name] for screen_name in screen_names]})

In [3]:
connection = psycopg2.connect('dbname=link_formation host=localhost user=postgres password=1_sehir_1')

user_connections = pd.read_sql("SELECT * FROM twitter_connection", connection).drop('id', axis=1)

In [4]:
truncate = lambda x: int(str(int(x))[:9])

twitter_users = pd.read_sql("SELECT * FROM twitter_user", connection)
twitter_users = twitter_users.where(twitter_users.match_name.str.len()>6)\
                             .dropna()
twitter_users["truncated_id"] = twitter_users.id.apply(truncate)

#         .where(twitter_users.match_ratio>85)
# .where(~twitter_users.name.str.contains("(?i)sehir"))\
twitter_users.sample(5)

Unnamed: 0,id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,truncated_id
23840,410884000.0,orhanvahitguler,orhan_vahit,tr,Orhan Guler,77.0,97.0,501.0,410884010
33392,9.661978e+17,Gökhan Bozbaş دـ گوكخان بوزباش,gbozbash,tr,Gokhan Bora Esmer,74.0,113.0,281.0,966197821
23967,615410400.0,Trend Topics,TrendTopics3D,tr,trend micro,70.0,31.0,369.0,615410354
7595,1006486000.0,Aysel İşler,psikologvari,tr,Aysel Isler,95.0,112.0,174.0,100648551
37939,9.423449e+17,Hasan Nayir,hasannyr,tr,Hasan Huseyin Ates,86.0,12.0,45.0,942344857


In [5]:
is_org = lambda x:"sehir" in clean(x)
twitter_users["is_org"] = twitter_users.screen_name.apply(is_org)
twitter_users.sample(5)

Unnamed: 0,id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,truncated_id,is_org
47427,8.903012e+17,Pınar,pinaarshn,tr,P�nar Akan,90.0,150.0,203.0,890301167,False
15790,7.34859e+17,Şüheda Düzcan,suhedaduzcan,tr,Suheda Duzcan,87.0,22.0,75.0,734858958,False
46492,351526700.0,Ege Üniversitesi,UniversiteEge,tr,Şehir Üniversitesi (İnsan ve Toplum Bilimleri...,86.0,112638.0,72.0,351526708,False
45576,2389975000.0,Havva Deniz,hdeniz_psy,tr,Havva Deniz Mollaibrahimoglu,90.0,157.0,920.0,238997521,False
53647,390706900.0,ali tekin,35_arap,tr,Ali Ihsan Aydin,86.0,40.0,63.0,390706947,False


In [6]:
sehir_orgs = twitter_users[twitter_users.is_org==True].set_index("id")
sehir_users = twitter_users[twitter_users.is_org==False]

In [7]:
len(sehir_orgs)

231

In [8]:
sehir_orgs.sample(5)

Unnamed: 0_level_0,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,truncated_id,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8.192692e+17,Sehir Alumni,sehir_alumni,en,Sehir Sehir,95.0,281.0,5.0,819269230,True
3334986000.0,Lord Varys,sehirveitiraf,tr,Selda Ünvar Yılmaz,63.0,7.0,78.0,333498586,True
9.952556e+17,Şehir Activity,SehirActivity,tr,Şehir Üniversitesi (İnsan ve Toplum Bilimleri...,86.0,0.0,14.0,995255575,True
758889000.0,Cihan Cobanoglu,cihansehiruni,tr,Cihan Cobanoglu,100.0,420.0,1001.0,758889025,True
9.629691e+17,Şehir Üni Genç Önder,sehirgenconder,tr,Şehir Üniversitesi (İnsan ve Toplum Bilimleri...,86.0,79.0,223.0,962969120,True


In [9]:
sehir_users.head(5)

Unnamed: 0,id,name,screen_name,lang,match_name,match_ratio,followers_count,friends_count,truncated_id,is_org
1,567090000.0,Övünç Meriç,ovuncmeric,tr,Kardelen Meric,77.0,569.0,1170.0,567090020,False
4,726207600.0,klasik,klasikyayinlari,tr,Yavuz Kasikci,75.0,6596.0,142.0,726207614,False
5,497942800.0,Küre Yayınları,kureyayinlari,tr,Merve Yakinlar,72.0,9158.0,166.0,497942798,False
6,2674867000.0,Fatıma Tuba Yaylacı,fatimatubapetek,en,Fatima Tuba Yaylaci,94.0,844.0,246.0,267486658,False
7,1439589000.0,Mahmut Koca,mkoca66,en,Mahmut Koca,100.0,1379.0,44.0,143958858,False


In [10]:
sehir_directory = pd.read_csv('../datasets/contacts.csv', 
                               encoding = "ISO-8859-1", 
                               usecols=['First Name', 'Last Name', 'Primary Email'])
sehir_directory.replace(np.nan, '', regex=True, inplace=True)

In [11]:
fullnames = [' '.join(first_last_name).lower() 
                 for first_last_name in sehir_directory[['First Name', 'Last Name']].values]

In [None]:
start = time.time()
sehir_matches = Parallel(n_jobs=-1)(delayed(get_matches_dataframe)(
    sehir_users[int(i*(len(sehir_users)/8)):int((i+1)*(len(sehir_users)/8))],
    sehir_directory) for i in range(8))
print("took: ", time.time()-start)

In [None]:
sehir_matches_df = pd.concat(sehir_matches)
sehir_matches_df.index = range(len(sehir_matches_df))
print("There are {} matches".format(len(sehir_matches_df)))
sehir_matches_df.sample(5)

In [480]:
sehir_matches_df['match_ratio'] = sehir_matches_df.match_name.apply(lambda x: x[0][1])
sehir_matches_df.match_name = sehir_matches_df.match_name.apply(lambda x: x[0][0])
sehir_matches_df.sample(5)

Unnamed: 0,match_name,screen_name,match_ratio
28057,fatma derya mentes,DeryaaSarii,90
10093,nur betã¼l yerli,bacimbilegin,90
20027,emine bayraktar,emine_blt_1,86
19182,etem hakan ergec,hakan_tunaa,86
3016,sami anis abuhamdeh,samiyigit_,86


In [481]:
tu=twitter_users.drop(labels=["match_name","match_ratio"], axis=1)

In [482]:
twitter_users = sehir_matches_df.merge(tu, on="screen_name")
twitter_users = twitter_users.set_index("id")
twitter_users.head(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,name,lang,followers_count,friends_count,truncated_id,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9.970584e+17,azize fatma cakir,fgurbuz35,86,fatma gürbüz,tr,68.0,122.0,997058428,False
9.369498e+17,okan mergen,miraokan42,86,miraç okan ekmekci,tr,4.0,19.0,936949783,False
2306379000.0,ä°stanbul åehir ãniversitesi ä°åletme enst...,CHPIstGenclik,86,CHP İstanbul Gençlik,tr,5735.0,3223.0,230637902,False
333417900.0,hanife kubra demirci,RabiaDeemirci,86,Rabia Demirci,tr,96.0,314.0,333417875,False
4742273000.0,gizem serpil boylu,kronik__rehber,86,serpil.sedef,tr,49.0,113.0,474227276,False


In [483]:
sehir_orgs.to_csv("../datasets/orgs.csv",index_label="id")
sehir_orgs.to_csv("../REST/static/orgs.csv",index_label="id")

In [13]:
twitter_users = pd.read_csv("../datasets/twitter_users.csv", index_col="id")
twitter_users.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,name,lang,followers_count,friends_count,truncated_id,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
239873600.0,emre ceran,aeozyurt,86,Ahmet Emre Özyurt,tr,408.0,208.0,239873566,False
4871276000.0,ali ãakmak,_SoWesata,86,Ali Yılmaz ϜϓſϞ 🇹🇷🇺🇦,tr,299.0,391.0,487127612,False
2499717000.0,yonis mohamud esse,nryyol55,72,hamuş,tr,12.0,181.0,249971689,False
8.913728e+17,mustafa enes akturkoglu,eneskahyaoglu1_,86,Enes Kahyaoğlu,tr,193.0,724.0,891372848,False
3405437000.0,adobe user,saypirvumin,86,saypır evli user,tr,14188.0,93.0,340543717,False


In [14]:
filtered_twu = twitter_users[twitter_users.match_ratio>90]
filtered_twu = filtered_twu.append(twitter_users.loc[291122559])  # me: Ammar Rashed :)

In [15]:
len(twitter_users), len(filtered_twu)

(41768, 2618)

In [18]:
filtered_twu = pd.read_csv("../datasets/filtered_twitter_users.csv", index_col="id")
filtered_twu.sample(5)

Unnamed: 0_level_0,match_name,screen_name,match_ratio,name,lang,followers_count,friends_count,truncated_id,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
9.688809e+17,hafsa nur aslanoglu,narkabuguhna,95,Hafsa Nur Aslanoğlu,tr,164.0,226.0,968880888,False
1602070000.0,mustafa can,mustafacan29665,100,mustafa can,tr,7.0,45.0,160206994,False
9.388624e+17,ismail havuz,ismaily01363798,92,ismail yavuz,tr,0.0,64.0,938862447,False
7.285541e+17,demet erdogan,Dmt_Erdgn,92,Demet Erdoğan,en,26.0,15.0,728554070,False
367078100.0,mustafa bal,mustafaunal83,91,mustafa ünal,tr,496.0,4976.0,367078062,False


In [19]:
filtered_twu.loc[291122559]

match_name            ammar rasid
screen_name        AmmarRashed_MB
match_ratio                    87
name                 Ammar Rashed
lang                           en
followers_count               392
friends_count                 337
truncated_id            291122559
is_org                      False
Name: 291122559.0, dtype: object

In [20]:
filtered_twu[filtered_twu.screen_name.str.contains("kral")]

Unnamed: 0_level_0,match_name,screen_name,match_ratio,name,lang,followers_count,friends_count,truncated_id,is_org
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
14668730.0,ahmet bulut,kral,100,Ahmet Bulut,en,242.0,62.0,14668733,False
2387189000.0,mehmet yilmaz,kralyoshi,92,Mehmet Yılmaz,tr,5.0,203.0,238718926,False


# Filter connections

In [21]:
twu_with_orgs = pd.concat([filtered_twu, sehir_orgs])
twu_with_orgs.sample(5)

Unnamed: 0_level_0,followers_count,friends_count,is_org,lang,match_name,match_ratio,name,screen_name,truncated_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8.634731e+17,54.0,123.0,False,tr,oguzhan tezel,92.0,Oğuzhan Tezel,oguzhaantezel,863473065
103012300.0,18.0,199.0,False,en,zeynep yilmaz,92.0,zeynep yılmaz,zeynep2,103012327
9.097353e+17,48.0,202.0,False,tr,omer faruk kacmen,94.0,Ömer Faruk Kaçmen,kacmenomer,909735272
799977000.0,10.0,185.0,False,tr,reyhan reyhan,95.0,Osman & Reyhan,osmanfidan7,799977030
2544539000.0,86.0,397.0,False,tr,mustafa demir,96.0,Mustafa özdemir,Mustafa78778240,254453852


In [22]:
assert len(twu_with_orgs) == len(filtered_twu) + len(sehir_orgs)
len(twu_with_orgs)

2849

In [24]:
# user_connections.formation = user_connections.formation.apply(lambda x:{"2018.05.24":True})
user_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
49206,59108737,1537318436,{'2018.05.24': True}
51761,2749898057,222806278,{'2018.05.24': True}
56601,1519685490,2612870692,{'2018.05.24': True}
49407,1317272377,1537318436,{'2018.05.24': True}
20297,3299537073,1666891914,{'2018.05.24': True}


In [25]:
ids = set(twu_with_orgs.index)
def in_sehir(row, from_col="from_user_id", to_col="to_user_id"):
    return row[from_col] in ids and row[to_col] in ids

In [26]:
user_connections["in_sehir"] = user_connections.apply(lambda row: in_sehir(row), axis=1)
sehir_connections = user_connections[user_connections.in_sehir].drop("in_sehir", axis=1)
sehir_connections.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
47891,3421922319,843946061626576896,{'2018.05.24': True}
18308,1036789573,2545544532,{'2018.05.24': True}
29763,1040271559,1979040049,{'2018.05.24': True}
34538,1941077114,609301446,{'2018.05.24': True}
22252,386018373,443551180,{'2018.05.24': True}


In [27]:
len(user_connections),len(sehir_connections)

(93296, 9020)

## Adding older connections

In [28]:
from datetime import datetime
def get_dates(cons):
    all_dates = set()
    str2date = lambda strdate: datetime.strptime(strdate, '%Y.%m.%d')  # 2018.05.08

    for dates in cons.formation.apply(lambda x: list(x)):
        for date in dates:
            all_dates.add(str2date(date))
    return [d.strftime('%Y.%m.%d') for d in sorted(all_dates)]

def present_in_date(changes_dates, queried_date):
    """
    changes_dates = {d1:True, d2:False, d3:True} connection added or removed
    """
    if changes_dates:
        str2date = lambda strdate: datetime.strptime(strdate, '%Y.%m.%d')  # 2018.05.08
        changes = sorted(changes_dates,key=lambda d: str2date(d))
        queried_date = datetime.strptime(queried_date, '%Y.%m.%d')
        present = False
        for d in changes:
            if queried_date < str2date(d):
                break
            present = changes_dates[d]
        return present
    else:
        return False

In [29]:
con2 = psycopg2.connect('dbname=old host=localhost user=postgres password=1_sehir_1')

old_cons = pd.read_sql("SELECT * FROM twitter_connection", con2).drop('id', axis=1)

In [30]:
old_cons.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
11107,613357064,106086098,{'2018.05.08': True}
19715,795546530845442048,1666891914,{'2018.05.08': True}
31943,3292423005,458521438,{'2018.05.08': True}
13495,989570894585528320,3892757176,{'2018.05.08': True}
35707,450639507,172846089,{'2018.05.08': True}


In [31]:
old_cons["in_sehir"] = old_cons.apply(lambda row: in_sehir(row), axis=1)
old_sehir = old_cons[old_cons.in_sehir].drop("in_sehir", axis=1)
old_sehir.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
11195,300804736,106086098,{'2018.05.08': True}
30617,783385197186256896,4091594428,{'2018.05.08': True}
7501,260874526,106086098,{'2018.05.08': True}
31678,789462997,1222491402,{'2018.05.08': True}
6300,2787503142,106086098,{'2018.05.08': True}


In [32]:
len(old_cons),len(old_sehir)

(39848, 6200)

In [33]:
concat_cons = pd.concat([sehir_connections, old_sehir])
assert len(concat_cons) == len(old_sehir)+len(sehir_connections)
len(concat_cons)

15220

In [34]:
def optimize_dates(dates_):
    dates = {list(d.keys())[0]:d[list(d.keys())[0]] for d in dates_}
    str2date = lambda strdate: datetime.strptime(strdate, '%Y.%m.%d')  # 2018.05.08
    sorted_dates = sorted(dates, key=lambda d:str2date(d))
    optimized_dates = {sorted_dates[0]: True}
    for d in range(1, len(sorted_dates)):
        if dates[sorted_dates[d-1]] != dates[sorted_dates[d]]:
            optimized_dates[sorted_dates[d]] = dates[sorted_dates[d]]
    return str(optimized_dates)

In [35]:
grouped_cons = concat_cons.groupby(["from_user_id","to_user_id"])["formation"]\
                    .apply(lambda x:optimize_dates(x)).reset_index()
len(grouped_cons)

9031

In [36]:
len(old_sehir), len(sehir_connections)

(6200, 9020)

In [37]:
str2dict = lambda d : ast.literal_eval(d)
grouped_cons.formation = grouped_cons.formation.apply(lambda d: str2dict(d))
grouped_cons.sample(5)

Unnamed: 0,from_user_id,to_user_id,formation
8991,970643320107126784,870255908642856960,{'2018.05.08': True}
384,97633683,2612870692,{'2018.05.24': True}
4543,1392659191,1666891914,{'2018.05.08': True}
2890,526351576,1400588754,{'2018.05.24': True}
3215,614648680,162310009,{'2018.05.24': True}


In [38]:
twu_with_orgs.to_csv("../datasets/filtered_twitter_users.csv", index_label="id")
twu_with_orgs.to_csv("../REST/static/filtered_twitter_users.csv", index_label="id")

grouped_cons.to_csv("../datasets/filtered_twitter_connections.csv", index_label="id")
grouped_cons.to_csv("../REST/static/filtered_twitter_connections.csv", index_label="id")

## Construct the network

In [39]:
dates = get_dates(grouped_cons)
dates

['2018.05.08', '2018.05.24']

In [40]:
grouped_cons["first_date"] = grouped_cons.formation.apply(
    lambda ds: present_in_date(ds, dates[0]))
grouped_cons.sample(5, random_state=42)

Unnamed: 0,from_user_id,to_user_id,formation,first_date
2373,450639507,151606317,{'2018.05.08': True},True
7802,4576344862,564388225,{'2018.05.08': True},True
5123,1727958896,174415744,{'2018.05.08': True},True
6108,2590180702,1222491402,{'2018.05.08': True},True
6011,2529427087,609301446,{'2018.05.08': True},True


In [41]:
G = nx.DiGraph()
# for _, row in user_connections.iterrows():
for _, row in grouped_cons[grouped_cons.first_date==True].iterrows():    
    from_ = truncate(row["from_user_id"])
    to = truncate(row["to_user_id"])
    if from_ in twu_with_orgs.truncated_id and to in twu_with_orgs.truncated_id:
        G.add_edge(from_, to)

In [42]:
augs = ["name", "screen_name","match_name", "followers_count","friends_count", "lang"]
for node in G.nodes():
    user = twu_with_orgs.loc[node]
    for aug in augs:
        if aug=="lang":
            m = user[aug]
        elif type(user[aug])==str:
            m = clean(user[aug])
        else:
            m = user[aug]
        G.nodes[node][aug] = m

In [43]:
len(G.nodes())

722

In [44]:
len(G.edges())

1280

In [45]:
for ix,deg in G.degree(G.nodes()):
    G.node[ix]['degree'] = deg
    G.node[ix]['parity'] = (1-deg%2)
    
for ix,in_deg in G.in_degree(G.nodes()):
    G.node[ix]['in_degree'] = in_deg
    
for ix,out_deg in G.out_degree(G.nodes()):
    G.node[ix]['out_degree'] = out_deg

In [46]:
evc = nx.eigenvector_centrality(G)
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)
pagerank = nx.pagerank(G)
nxg = G.to_undirected()
clustering = nx.clustering(nxg)

In [47]:
metrics = {"eigenvector_centrality":evc,
           "closeness_centrality":closeness,
          "betweenness":betweenness,
          "pagerank":pagerank,
          "clustering_coefficient":clustering}

In [48]:
for metric_name, metric in metrics.items():
    for ix,v in metric.items():
        G.nodes[ix][metric_name] = v

In [49]:
list(G.nodes(data=True))[0]

(396662786,
 {'betweenness': 0.0,
  'closeness_centrality': 0.0,
  'clustering_coefficient': 0,
  'degree': 1,
  'eigenvector_centrality': 6.741618620868637e-27,
  'followers_count': 33.0,
  'friends_count': 284.0,
  'in_degree': 0,
  'lang': 'tr',
  'match_name': ' sehir mba',
  'name': 'nemasehir',
  'out_degree': 1,
  'pagerank': 0.0002640177962431144,
  'parity': 0,
  'screen_name': 'nemasehir'})

In [50]:
import json
from networkx.readwrite import json_graph
data = nx.node_link_data(G)
with open('../REST/static/networks/twitter_users_graph2.json', 'w') as f:
    json.dump(data, f, indent=4)

## Calculating Homophily

In [51]:
def homophily(nw, metric="lang"):
    langs_probs = dict()
    for n in nw.nodes():
        user = nw.nodes[n]
        langs_probs.setdefault(user[metric], 0)
        langs_probs[user[metric]] += 1
    heterogeneity_fraction_norm = 1 - sum(
        [(float(i)/len(nw.nodes()))**2 for i in langs_probs.values()])
    cross_edges = sum(
        [int(nw.nodes[f][metric] != nw.nodes[t][metric] ) for f,t in nw.edges()])
    cross_metric_ratio = cross_edges/float(len(nw.edges()))
    print("cross-metric edges ratio: ", cross_metric_ratio)
    print("Heterogeneity Fraction Norm", heterogeneity_fraction_norm)
    return cross_metric_ratio < heterogeneity_fraction_norm

In [52]:
homophily(G)

cross-metric edges ratio:  0.4578125
Heterogeneity Fraction Norm 0.39950199891038296


False

# Transitivity

In [53]:
nx.transitivity(G)

0.024470693113438823