In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
# Read in data from CSV file, skipping the first row
df = pd.read_csv('dataset.csv')

In [None]:
# Create an empty graph
graph = nx.Graph()

In [None]:
# Add nodes for each user
users = df['usernames'].unique()
graph.add_nodes_from(users)

In [None]:
# Optimize edge creation
# Create a dictionary where each user is mapped to the set of subreddits they participated in
user_subs = {user: set(df.loc[df['usernames'] == user][df.columns[1:]].stack().dropna().values) for user in users}
# Convert the sets of subreddits to lists
user_subs = {user: list(subs) for user, subs in user_subs.items()}

In [None]:
# Add edges for each combination of users who participated in the same subreddit
for i, user1 in enumerate(users):
    print(f'Start user {user1}')
    user1_subs_list = user_subs[user1]
    for user2 in users[i + 1:]:
        user2_subs_list = user_subs[user2]
        # Find the common subreddits between user1 and user2
        common_subs = set(user1_subs_list).intersection(user2_subs_list)
        if common_subs:
            # Convert the set of common subreddits to a list
            common_subs_list = list(common_subs)
            # Exclude unsupported data values (e.g., datetime objects) by converting them to strings
            common_subs_list = [str(sub) for sub in common_subs_list]
            # Add an edge between user1 and user2 with the common subreddits as an attribute
            graph.add_edge(user1, user2, subreddits=common_subs_list)
    print(f'Finished user {user1}')

In [None]:
# Set data type for 'subreddits' attribute explicitly
# Convert the 'subreddits' attribute of each edge to a string
nx.set_edge_attributes(graph, {edge: str(graph.edges[edge]['subreddits']) for edge in graph.edges}, 'subreddits')
print("end")

In [None]:
# Print the number of nodes and edges in the graph
print('Number of nodes:', graph.number_of_nodes())
print('Number of edges:', graph.number_of_edges())

In [None]:
# Save the graph in GraphML format
nx.write_graphml(graph, 'Network.graphml')

In [None]:
# Draw the graph using NetworkX
fig, ax = plt.subplots(figsize=(12, 12))
# Compute the layout of the graph using the spring layout algorithm with a random seed and a distance parameter
pos = nx.spring_layout(graph, seed=42, k=3)
# Draw nodes with blue color and transparency
nx.draw_networkx_nodes(graph, pos, node_size=50, node_color='blue', alpha=0.5)
# Draw edges with gray color and transparency
nx.draw_networkx_edges(graph, pos, edge_color='gray', alpha=0.5, width=2)
# Draw labels for nodes with a small font size and using a sans-serif font
nx.draw_networkx_labels(graph, pos, font_size=8, font_family='sans-serif')
# Turn off the axis
plt.axis('off')

In [None]:
# Save the graph as a PNG image
plt.savefig('Build_Network_image.png', format='png')

In [None]:
# Display the graph
plt.show()