In [8]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Read in data from CSV file, skipping the first row
df = pd.read_csv('dataset.csv')

# Create an empty graph
graph = nx.Graph()

# Add nodes for each user
users = df['usernames'].unique()
graph.add_nodes_from(users)

# Optimize edge creation
user_subs = {user: set(df.loc[df['usernames'] == user][df.columns[1:]].stack().dropna().values) for user in users}
user_subs = {user: list(subs) for user, subs in user_subs.items()}  # Convert sets to lists

# Add edges for each combination of users who participated in the same subreddit
for i, user1 in enumerate(users):
    print(f'Start user {user1}')
    user1_subs_list = user_subs[user1]
    for user2 in users[i + 1:]:
        user2_subs_list = user_subs[user2]
        common_subs = set(user1_subs_list).intersection(user2_subs_list)
        if common_subs:
            # Convert the set of common subreddits to a list
            common_subs_list = list(common_subs)
            # Exclude unsupported data values (e.g., datetime objects)
            common_subs_list = [str(sub) for sub in common_subs_list]
            graph.add_edge(user1, user2, subreddits=common_subs_list)
    print(f'Finished user {user1}')

# Set data type for 'subreddits' attribute explicitly
nx.set_edge_attributes(graph, {edge: str(graph.edges[edge]['subreddits']) for edge in graph.edges}, 'subreddits')
print("end")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Finished user MarcaP123
Start user Away_Bee_8734
Finished user Away_Bee_8734
Start user ampsonic
Finished user ampsonic
Start user Flashdancer405
Finished user Flashdancer405
Start user WreckMyHoleDaddy
Finished user WreckMyHoleDaddy
Start user CackalackRunner
Finished user CackalackRunner
Start user lem0nwings
Finished user lem0nwings
Start user jm9thh
Finished user jm9thh
Start user nathanc98
Finished user nathanc98
Start user cupcakeandsweetlover
Finished user cupcakeandsweetlover
Start user raiigiic
Finished user raiigiic
Start user Yeeyeetyall
Finished user Yeeyeetyall
Start user Revolutionary_Act_63
Finished user Revolutionary_Act_63
Start user Persistence-Key459
Finished user Persistence-Key459
Start user rene-cumbubble
Finished user rene-cumbubble
Start user dancedanceevol909
Finished user dancedanceevol909
Start user medodoamor
Finished user medodoamor
Start user Albinkiiii
Finished user Albinkiiii
Start user Blo

In [14]:
# Print number of nodes and edges in the graph
print('Number of nodes:', graph.number_of_nodes())
print('Number of edges:', graph.number_of_edges())

Number of nodes: 23173
Number of edges: 6877773


In [12]:
# Save the graph in GraphML format
nx.write_graphml(graph, 'Network.graphml')

In [None]:
# Draw the graph using NetworkX
fig, ax = plt.subplots(figsize=(12, 12))
pos = nx.spring_layout(graph, seed=42, k=3)  # set k parameter to 0.3 to increase distance between nodes
nx.draw_networkx_nodes(graph, pos, node_size=50, node_color='blue', alpha=0.5)
nx.draw_networkx_edges(graph, pos, edge_color='gray', alpha=0.5, width=2)
nx.draw_networkx_labels(graph, pos, font_size=8, font_family='sans-serif')
plt.axis('off')
# Save the graph as PNG image
plt.savefig('graph.png', format='png')

plt.show()