In [2]:
import pandas as pd
import numpy as np
import json
import networkx as nx
from networkx.algorithms import bipartite

In [3]:
# load data
df = pd.read_csv("minimumwage.csv", delimiter = ",")
df.drop(df.columns[[0]], axis = 1, inplace = True)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,ideology_retweeter,ideology_retweeted,retweeted,retweeter
0,-0.897884,-1.046964,id4554405,id88318
1,-1.098689,-1.046964,id4554405,id1245626
2,-1.279034,-1.046964,id4554405,id1181289
3,-2.188812,0.789726,id3200191,id2262899
4,0.033346,-1.046964,id4554405,id2094953
...,...,...,...,...
113317,-0.934184,-0.551040,id4080387,id3966184
113318,-0.934184,-0.551040,id4080387,id3966184
113319,-0.934184,-0.551040,id4080387,id3966184
113320,-0.934184,-1.046964,id4554405,id3966184


In [4]:
# Get number of uniques for retweeted and retweeter
n_retweeted = len(np.unique(df["retweeted"]))
n_retweeter = len(np.unique(df["retweeter"]))
print(f"Gives a {n_retweeted} X {n_retweeter} bipartite graph")

Gives a 5021 X 47428 bipartite graph


In [5]:
# check if the two partitions have overlap
check = any(item in df["retweeted"] for item in df["retweeter"])
if not check:
    print(f"No tweeters overlap in the two partions")

No tweeters overlap in the two partions


In [6]:
# create bipartite graph
BG = nx.Graph()
# Define the nodes in the to partitions
BG.add_nodes_from(np.unique(df["retweeted"].to_list()), bipartite = 0) 
BG.add_nodes_from(np.unique(df["retweeter"].to_list()), bipartie = 1)
# Add the edges between the nodes
edges = list(zip(df["retweeted"].to_list(),df["retweeter"].to_list()))
BG.add_edges_from(edges)

In [7]:
BG.number_of_edges()

88564

In [8]:
remove = [node for node, degree in BG.degree() if degree < 0]
remove
# So no singletons

[]

In [9]:
[len(c) for c in sorted(nx.connected_components(BG), key=len, reverse=True)]

[47301,
 759,
 17,
 17,
 13,
 11,
 11,
 9,
 8,
 7,
 7,
 7,
 7,
 7,
 7,
 6,
 6,
 6,
 6,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 

In [11]:
nx.is_connected(BG)

False

In [12]:
largest_cc = max(nx.connected_components(BG), key=len)
BG = BG.subgraph(max(nx.connected_components(BG), key=len)).copy()
BG

<networkx.classes.graph.Graph at 0x1fa70867940>

In [13]:
nx.is_connected(BG)

True

In [14]:
pruned_nodes = list(BG.nodes())

In [45]:
partition_0 = list(set(df["retweeted"].to_list()).intersection(set(pruned_nodes)))
partition_1 = list(set(df["retweeter"].to_list()).intersection(set(pruned_nodes)))
print(f"New size of bipartite graph {len(partition_0)} X {len(partition_1)}")

New size of bipartite graph 4231 X 45555


In [43]:
partition_0 = list((set(pruned_nodes) & set(np.unique(df["retweeted"].to_list()))))
partition_1 = list((set(pruned_nodes) & set(np.unique(df["retweeter"].to_list()))))

In [41]:
len(list(set(partition_0).intersection(partition_1)))

2485

In [46]:
any(item in partition_0 for item in partition_1)

True

In [31]:
#Create new bipartite graph from the "pruned" nodes
# create bipartite graph
B = nx.Graph()
# Define the nodes in the to partitions
B.add_nodes_from(partition_0, bipartite = 0) 
B.add_nodes_from(partition_1, bipartie = 1)

In [17]:
retweeted = df["retweeted"].to_list()
retweeter = df["retweeter"].to_list()

In [18]:
# Find edges
edges = list(zip(retweeted, retweeter))
len(edges)

113322

In [19]:
edges[0]

('id4554405', 'id88318')

In [29]:
from tqdm import tqdm
new_edges = []
for i in tqdm(range(len(edges))):
    if edges[i][0] in partition_0 and edges[i][1] in partition_1:
        new_edges.append(edges[i])

100%|██████████| 113322/113322 [02:28<00:00, 761.06it/s] 


In [30]:
len(new_edges)

111383

In [32]:
# Add the edges between the nodes
B.add_edges_from(new_edges)

In [33]:
# connected components
[len(c) for c in sorted(nx.connected_components(B), key=len, reverse=True)]

[47301]

In [34]:
# Check if the graph is bipartite
bipartite.is_bipartite(B)

False

In [37]:
from networkx.algorithms import bipartite
top_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
bottom_nodes = set(BG) - top_nodes
print(round(bipartite.density(B, bottom_nodes), 2))

KeyError: 'bipartite'

In [None]:
bipartite.sets(BG[top_nodes])

NameError: name 'top_nodes' is not defined

In [None]:
nx.draw_networkx(BG, pos = nx.drawing.layout.bipartite_layout(BG))

TypeError: bipartite_layout() missing 1 required positional argument: 'nodes'