In [15]:
!pip install pyvis




In [16]:
import itertools as it
import math
import random as rnd
import typing as t

import matplotlib
import networkx as nx
import pandas as pd
import pyvis.network as pyv_n

def parse_line(line):
    fields = line.split("\t")
    properties = fields[4].split(",")

    return (fields[0], fields[1], fields[2]), fields[0], fields[1],fields[4]

def read_graph_file(path):
    all_lines = []
    with open(path) as f:
        all_lines = f.readlines()

    all_edges = []
    all_nodes = set()
    for line in all_lines[1:]:
        single_edge, source_node, target_node,link_sentiment = parse_line(line)
        all_nodes.add(source_node)
        all_nodes.add(target_node)
        if link_sentiment == '-1':
          all_edges.append(single_edge)

    return all_nodes, all_edges

all_nodes, all_edges = read_graph_file("/content/soc-redditHyperlinks-body.tsv")

In [17]:
graph = nx.MultiDiGraph()
graph.add_nodes_from(all_nodes)
graph.add_edges_from(all_edges)
print("DONE")

DONE


In [30]:
negative_edge_counts = {}
for source, target, _ in graph.edges:
    if (source,target) in negative_edge_counts.keys():
      negative_edge_counts[(source, target)] += 1
    else:
      negative_edge_counts[(source, target)] = 1
    if (target,source) in negative_edge_counts.keys():
      negative_edge_counts[(target,source)] += 1
    else:
      negative_edge_counts[(target,source)] = 1

negative_edge_counts = dict(sorted(negative_edge_counts.items(), key=lambda item: item[1], reverse=True))
top_10_edges = list(negative_edge_counts.items())[:20]

for i in range(0, len(top_10_edges), 2):
    edge, count = top_10_edges[i]
    print(f"{edge}: {count}")

('askreddit', 'writingprompts'): 58
('pics', 'circlebroke'): 48
('brokehugs', 'christianity'): 47
('videos', 'circlebroke'): 41
('streetfighter', 'sf4'): 39
('askreddit', 'circlebroke'): 39
('news', 'circlebroke'): 38
('gamerghazi', 'shitghazisays'): 36
('funny', 'circlebroke'): 35
('kotakuinaction', 'ggfreeforall'): 34


As we can see askreddit and writingprompts have the largest amount of negative links between two subreddits.

In [19]:
degree_dict = dict(graph.degree())
top_degrees = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)

# Closeness Centrality
closeness = nx.closeness_centrality(graph)
top_closeness = sorted(closeness.items(), key=lambda x: x[1], reverse=True)

# PageRank
pagerank = nx.pagerank(graph)
top_pagerank = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)

# Display top 10 nodes for each
print("\nTop 10 Nodes by Degree:")
for node, val in top_degrees[:10]:
    print(f"{node}: {val}")

print("\nTop 10 Nodes by Closeness Centrality:")
for node, val in top_closeness[:10]:
    print(f"{node}: {val:.5f}")

print("\nTop 10 Nodes by PageRank:")
for node, val in top_pagerank[:10]:
    print(f"{node}: {val:.5f}")



Top 10 Nodes by Degree:
subredditdrama: 1623
askreddit: 1091
circlebroke: 740
drama: 598
conspiracy: 429
shitliberalssay: 415
news: 391
writingprompts: 390
worldnews: 390
circlejerkcopypasta: 383

Top 10 Nodes by Closeness Centrality:
askreddit: 0.03117
todayilearned: 0.02782
worldnews: 0.02777
videos: 0.02768
iama: 0.02764
news: 0.02740
pics: 0.02710
science: 0.02688
gaming: 0.02659
funny: 0.02620

Top 10 Nodes by PageRank:
askreddit: 0.00785
worldnews: 0.00282
videos: 0.00264
todayilearned: 0.00242
iama: 0.00228
pics: 0.00221
funny: 0.00219
news: 0.00214
relationships: 0.00189
ukraine: 0.00176


The subreddits above are subreddits that have high centrality measures. This means that these subreddits are highly influential in the graph. In our case, this means that these subreddits get or give a lot of negative links.

In [20]:
negative_nodes = set()
for source, target, _ in graph.edges:
    negative_nodes.add(source)
    negative_nodes.add(target)

# All subreddits
all_subreddits = set(graph.nodes)

# Subreddits not involved in any negative interaction
positive_subreddits = all_subreddits - negative_nodes

print(f"Number of positive subreddits: {len(positive_subreddits)}")
print("Sample of positive subreddits:")
for subreddit in list(positive_subreddits)[:10]:
    print(subreddit)


Number of positive subreddits: 29406
Sample of positive subreddits:
auto
lockpickbazaar
indoorgarden
dysbulic
conspiracydocumentary
ktymee
arautos
mirrorsedge
sipsmc
imaginaryartifacts


In [25]:
from pyvis.network import Network
from IPython.display import IFrame

if graph.is_directed():
    graph = graph.to_undirected()

cliques = list(nx.find_cliques(graph))

cliques_of_interest = [clique for clique in cliques if len(clique) >= 2]

cliques_sorted = sorted(cliques_of_interest, key=len, reverse=True)

for i, clique in enumerate(cliques_sorted[:10], 1):
    print(f"Clique {i} (size {len(clique)}): {clique}")


Clique 1 (size 11): ['subredditdrama', 'drama', 'askreddit', 'circlejerkcopypasta', 'circlebroke', 'conspiracy', 'iama', 'politics', 'news', 'the_donald', 'undelete']
Clique 2 (size 11): ['subredditdrama', 'drama', 'askreddit', 'circlejerkcopypasta', 'bestofoutrageculture', 'undelete', 'conspiracy', 'the_donald', 'iama', 'politics', 'news']
Clique 3 (size 11): ['subredditdrama', 'drama', 'askreddit', 'copypasta', 'bestofoutrageculture', 'conspiracy', 'undelete', 'the_donald', 'iama', 'politics', 'news']
Clique 4 (size 11): ['subredditdrama', 'drama', 'askreddit', 'copypasta', 'circlebroke', 'conspiracy', 'the_donald', 'iama', 'undelete', 'politics', 'news']
Clique 5 (size 10): ['subredditdrama', 'technology', 'askreddit', 'conspiracy', 'videos', 'showerthoughts', 'outoftheloop', 'changemyview', 'circlebroke2', 'the_donald']
Clique 6 (size 10): ['subredditdrama', 'technology', 'askreddit', 'conspiracy', 'videos', 'showerthoughts', 'outoftheloop', 'changemyview', 'askhistorians', 'explai

These are the strongly connected components in the graph

In [26]:
# Ensure graph is undirected
if graph.is_directed():
    graph = graph.to_undirected()

# Find all maximal cliques
cliques = list(nx.find_cliques(graph))

# Find the largest clique
largest_clique = max(cliques, key=len)

# Extract subgraph for the largest clique
clique_subgraph = graph.subgraph(largest_clique)

# Visualize using pyvis
clique_net_vis = Network(notebook=True, cdn_resources="remote")
clique_net_vis.from_nx(clique_subgraph)
clique_net_vis.save_graph("largest_clique.html")

# Display the interactive visualization
HTML(filename="largest_clique.html")

Above is displayed the largest clique in the graph