In [5]:
# imports
import networkx as nx
import pandas as pd
from networkx.algorithms import approximation
import matplotlib.pyplot as plt

In [6]:
# Load the data
df = pd.read_csv('./network.csv/edges.csv', usecols=['source', 'target'])

G = nx.Graph()

sourceList = df['source'].tolist()
targetList = df['target'].tolist()
# sexList = df['sex'].tolist()
added = []
for i in range(len(sourceList)):
    G.add_edges_from([(sourceList[i], targetList[i])])
    added.append((sourceList[i], targetList[i]))
print(f'Whole network: {G}')

Whole network: Graph with 35229 nodes and 48892 edges


In [8]:
# Largest  component
print(f'Is our graph connected: {nx.is_connected(G)}')
largest_cc = max(nx.connected_components(G), key=len)
Gcc = G.subgraph(largest_cc)

Is our graph connected: False


In [10]:
# Calculating the various centrality measures
degree = dict(Gcc.degree)
closeness = nx.closeness_centrality(Gcc)
betweenness = nx.betweenness_centrality(Gcc)

KeyboardInterrupt: 

In [12]:
# We estimate the structural equivalence using the jaccard coefficient of the neighbor sets of two nodes.
nodes = list(Gcc.nodes)
str_equivalence = {}
for i in range(len(nodes) - 1):
   neighbors_i = set(Gcc.neighbors(nodes[i]))
   for j in range(i + 1, len(nodes)):
      neighbors_j = set(G.neighbors(nodes[j]))
      str_equivalence[(nodes[i], nodes[j])] = len(neighbors_i & neighbors_j) / len(neighbors_i | neighbors_j)

KeyboardInterrupt: 

Finding the most similar nodes

In [14]:
# since there are ties, we need to find all pairs with maximum value
max_value = max(str_equivalence.values())
most_similar_nodes = [key for key, value in str_equivalence.items() if value == max_value]
print("The pairs of most similar nodes are %s" % ('; '.join("%s,%s" % (pair[0], pair[1]) for pair in most_similar_nodes)))

The pairs of most similar nodes are 11,12; 11,13; 12,13; 14,15; 17,14186; 17,14187; 17,14188; 17,14189; 17,14190; 17,14191; 17,14192; 17,14193; 17,14194; 17,14195; 18,19; 18,20; 18,21; 18,32; 18,34; 18,35; 19,20; 19,21; 19,32; 19,34; 19,35; 20,21; 20,32; 20,34; 20,35; 21,32; 21,34; 21,35; 22,23; 22,24; 22,25; 22,26; 22,27; 22,28; 22,29; 22,30; 22,31; 23,24; 23,25; 23,26; 23,27; 23,28; 23,29; 23,30; 23,31; 24,25; 24,26; 24,27; 24,28; 24,29; 24,30; 24,31; 25,26; 25,27; 25,28; 25,29; 25,30; 25,31; 26,27; 26,28; 26,29; 26,30; 26,31; 27,28; 27,29; 27,30; 27,31; 28,29; 28,30; 28,31; 29,30; 29,31; 30,31; 32,34; 32,35; 34,35; 36,37; 38,39; 38,40; 38,41; 38,42; 38,43; 38,44; 38,45; 38,46; 38,47; 39,40; 39,41; 39,42; 39,43; 39,44; 39,45; 39,46; 39,47; 40,41; 40,42; 40,43; 40,44; 40,45; 40,46; 40,47; 41,42; 41,43; 41,44; 41,45; 41,46; 41,47; 42,43; 42,44; 42,45; 42,46; 42,47; 43,44; 43,45; 43,46; 43,47; 44,45; 44,46; 44,47; 45,46; 45,47; 46,47


Does our network follow sexual preferences or ethnicity?

In [None]:
# Loop over the edges. Every time we find an edge we already saw, that's a reciprocal one.
# Then we keep track of how many have the same sign vs a different sign.
edges_saw = {}
balance = 0
status = 0
for e in G.edges(data = True):
   canonical_edge_form = (min(e[0], e[1]), max(e[0], e[1]))
   if not canonical_edge_form in edges_saw:
      edges_saw[canonical_edge_form] = e[2]["sign"]
   else:
      if e[13]["sign"] == edges_saw[canonical_edge_form]:
         balance += 1
      else:
         status += 1

In [16]:
# Assume 'G' is a NetworkX graph with 'sexual_preference' or 'ethnicity' as edge attributes

# Initialize counters for homophily and heterophily
homophily_count = 0
heterophily_count = 0

# Dictionary to keep track of edges we've seen
edges_seen = {}

# Loop over the edges
for e in G.edges(data=True):
    # Create a canonical form of the edge, ignoring direction
    canonical_edge_form = tuple(sorted((e[0], e[1])))

    # If the edge has not been seen before, store its attributes
    if canonical_edge_form not in edges_seen:
        edges_seen[canonical_edge_form] = e[2]['sex']  # Replace 'attribute_name' with the actual attribute

    # If the edge has been seen and is reciprocal
    else:
        # Compare the attribute of the current edge with the previously seen one
        if e[2]['ethn'] == edges_seen[canonical_edge_form]:  # Replace 'attribute_name' with the actual attribute
            homophily_count += 1
        else:
            heterophily_count += 1

# Print out the results
print(f'Homophily: {homophily_count}')
print(f'Heterophily: {heterophily_count}')

KeyError: 'sex'