In [None]:
pip install --upgrade scipy

In [None]:
pip install networkx

In [None]:
#importing the required libraries
import networkx as nx
import pandas as pd

In [None]:
edge_list = pd.read_csv('edge_list.csv')

In [None]:
edge_list.rename(columns={'author': 'Source'}, inplace=True)
edge_list.head()

In [None]:
import matplotlib.pyplot as plt

# Create a new empty graph
G = nx.Graph()

# Add edges to the graph from the DataFrame
for _, row in edge_list.iterrows():
    source = row['Source']
    target = row['Target']
    G.add_edge(source, target)

# Calculate degree centrality for each node
degree_centrality = nx.degree_centrality(G)

# Select the top influential nodes based on degree centrality
num_nodes_to_label = 20  # Adjust the number of nodes to label
sorted_nodes = sorted(degree_centrality, key=degree_centrality.get, reverse=True)
selected_nodes = sorted_nodes[:num_nodes_to_label]

# Visualize the graph
plt.figure(figsize=(12, 10))  # Adjust the figure size if needed
pos = nx.spring_layout(G)  # Choose a layout algorithm for the graph

# Specify node and edge colors, sizes, and styles
node_size = 30  # Adjust the node size
font_size = 6  # Adjust the font size
node_color = 'lightblue'
edge_color = 'black'

nx.draw_networkx(G, pos=pos, with_labels=False, node_color=node_color, edge_color=edge_color,
                 node_size=node_size, alpha=0.7)

# Create a dictionary of labels for the selected nodes
labels = {node: str(node) for node in selected_nodes if node in G.nodes}

nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=font_size, font_color='black')

plt.axis('off')  # Remove the axis
plt.show()


In [None]:
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")

In [None]:
from networkx.algorithms import community

# Calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# Calculate betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)

# Calculate clustering coefficient
clustering_coefficient = nx.clustering(G)

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i

c = 0  # Initialize 'c' with 0
for node in G.nodes:
    if clustering_coefficient[node] != 0:
        print(f"Node: {node}")
        print(f"Degree Centrality: {degree_centrality[node]}")
        print(f"Betweenness Centrality: {betweenness_centrality[node]}")
        print(f"Clustering Coefficient: {clustering_coefficient[node]}")
        print(f"Community: {community_assignment[node]}")
        print()
        c += 1

print(f"Number of nodes with non-zero clustering coefficient: {c}")



In [None]:
# Calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# Calculate betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)

# Calculate clustering coefficient
clustering_coefficient = nx.clustering(G)

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i


for node in G.nodes:
    print(f"Node: {node}")
    print(f"Degree Centrality: {degree_centrality[node]}")
    print(f"Betweenness Centrality: {betweenness_centrality[node]}")
    print(f"Clustering Coefficient: {clustering_coefficient[node]}")
    print(f"Community: {community_assignment[node]}")
    print()
    

In [None]:

# Calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# Calculate betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)

# Calculate clustering coefficient
clustering_coefficient = nx.clustering(G)

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i

# Create an empty DataFrame to store the centrality and community assignment values
centrality_df = pd.DataFrame(columns=['Node', 'Degree Centrality', 'Betweenness Centrality', 'Clustering Coefficient', 'Community'])

# Populate the DataFrame with centrality and community assignment values
for node in G.nodes:
    centrality_df = centrality_df.append({
        'Node': node,
        'Degree Centrality': degree_centrality[node],
        'Betweenness Centrality': betweenness_centrality[node],
        'Clustering Coefficient': clustering_coefficient[node],
        'Community': community_assignment[node]
    }, ignore_index=True)


# Merge the 'edge_list' DataFrame with the 'centrality_df' DataFrame based on the common 'Source' column

centrality_and_community_with_edge_data = centrality_df.merge(edge_list, left_on='Node', right_on='Source', how='left')

# Print the DataFrame with additional data
print(centrality_and_community_with_edge_data)

# Save the DataFrame with additional data to a CSV file
centrality_and_community_with_edge_data.to_csv('centrality_and_community_with_edge_data.csv', index=False)


In [None]:
centrality_df=centrality_and_community_with_edge_data

In [None]:
import matplotlib.pyplot as plt

# Plot the histogram of clustering coefficients
plt.hist(centrality_df['Clustering Coefficient'], bins=20, edgecolor='black')
plt.xlabel('Clustering Coefficient')
plt.ylabel('Number of Nodes')
plt.title('Distribution of Clustering Coefficients')
plt.savefig('figure.png',dpi=300)
plt.show()


In [None]:
x = centrality_df['Clustering Coefficient']
y = centrality_df['Degree Centrality']

# Plot the scatter plot
plt.scatter(x, y, alpha=0.5)
plt.xlabel('Degree Centrality')
plt.ylabel('Clustering Coefficient')
plt.title('Scatter Plot: Degree Centrality vs. Clustering Coefficient')
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Assuming you have a list of nodes and their betweenness centrality values
nodes = centrality_df['Node']
values = centrality_df['Betweenness Centrality']
# Create a scatter plot where the size of each node represents its betweenness centrality
plt.figure(figsize=(10, 6))
plt.scatter(nodes, values, s=[v * 5000 for v in values], alpha=0.7)
plt.xlabel('Nodes')
plt.ylabel('Betweenness Centrality')
plt.title('Betweenness Centrality Visualization (technology)')
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig('figure.png',dpi=300)
plt.show()
#put topics on the node axis- topicsthat act as bridge

In [None]:
filtered_df = centrality_df[centrality_df['Betweenness Centrality'] > 0.12]

# Print the 'Node' and 'Topic' columns for the filtered rows
for index, row in filtered_df.iterrows():
    print(f"Node: {row['Node']}, Topic: {row['Topic']}")

In [None]:
from networkx.algorithms import community

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Create a dictionary to store community node counts
community_node_counts = {}

# Count the nodes in each community
for i, comm in enumerate(communities):
    community_node_counts[i] = len(comm)

# Print the community node counts
for community_id, node_count in community_node_counts.items():
    print(f"Community {community_id}: {node_count} nodes")


In [None]:
import pandas as pd
from networkx.algorithms import community

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Filter the original dataframe for Community 0
community_nodes = [node for node, comm_id in community_assignment.items() if comm_id == 0]
community_df = edge_list[edge_list['Source'].isin(community_nodes)].copy()

# Store the dataframe for Community 0 in a variable
community_0_dataframe = community_df.copy()

# Print the dataframe for Community 0
community_0_dataframe
community_df.to_csv('communoty 0.csv', index=False)


In [None]:
from networkx.algorithms import community


# Create a graph from the edge list
G = nx.from_pandas_edgelist(edge_list, 'Source', 'Target')

# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i

# Analyze posts with commenters dispersed across communities
dispersed_posts = []
for post_title, group in edge_list.groupby('post_title'):
    commenters = set(group['Source'])
    community_ids = set(community_assignment.get(commenter) for commenter in commenters)
    if len(community_ids) > 1:
        dispersity = len(community_ids) / len(commenters)  # Calculate dispersity as the ratio of unique communities to total commenters
        subreddit = group['subreddit'].iloc[0]  # Get the subreddit for the post
        dispersed_posts.append((post_title, dispersity, subreddit))

# Sort the dispersed posts by dispersity in descending order
dispersed_posts_sorted = sorted(dispersed_posts, key=lambda x: x[1], reverse=True)

# Print the dispersed posts and their dispersity in order of dispersity
print("Posts with commenters dispersed across communities (in order of dispersity):")
for post_title, dispersity, subreddit in dispersed_posts_sorted:
    print(f"Post: {post_title}, Dispersity: {dispersity}, Subreddit: {subreddit}")


In [None]:
# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i

# Analyze posts with commenters dispersed across communities
dispersed_posts = set()
for post_title, group in edge_list.groupby('post_title'):
    commenters = set(group['Source'])
    community_ids = set(community_assignment.get(commenter) for commenter in commenters)
    if len(community_ids) > 1:
        dispersed_posts.add(post_title)

# Print the number of post titles and the number of post titles with dispersity
num_post_titles = len(edge_list['post_title'].unique())
num_post_titles_with_dispersity = len(dispersed_posts)

print("Number of post titles:", num_post_titles)
print("Number of post titles with dispersity:", num_post_titles_with_dispersity)


In [None]:
# Perform community detection using Louvain algorithm
communities = community.greedy_modularity_communities(G)

# Assign community labels to nodes
community_assignment = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_assignment[node] = i

# Analyze posts with commenters dispersed across communities
dispersed_posts = []
for post_title, group in edge_list.groupby('post_title'):
    commenters = set(group['Source'])
    community_ids = set(community_assignment.get(commenter) for commenter in commenters)
    if len(community_ids) > 1:
        dispersity = len(community_ids) / len(commenters)  # Calculate dispersity as the ratio of unique communities to total commenters
        subreddit = group['subreddit'].iloc[0]  # Get the subreddit for the post
        dispersed_posts.append((post_title, dispersity, subreddit))

# Sort dispersed posts by dispersity in descending order
dispersed_posts.sort(key=lambda x: x[1], reverse=True)

# Extract post titles and dispersities for visualization
post_titles = [post[0] for post in dispersed_posts]
dispersities = [post[1] for post in dispersed_posts]

# Create a bar chart to visualize dispersities
plt.figure(figsize=(12, 6))
plt.bar(range(len(dispersed_posts)), dispersities)
plt.xlabel('Post')
plt.ylabel('Dispersity')
plt.title('Dispersity of Commenters Across Communities')
plt.xticks(range(len(dispersed_posts)), post_titles, rotation=90)
plt.tight_layout()
plt.show()

In [None]:
import networkx as nx
import matplotlib.pyplot as plt


# Create a subgraph of edges connecting commenters from different communities
edges = [(source, target) for source, target in G.edges() if community_assignment[source] != community_assignment[target]]
subgraph = G.edge_subgraph(edges)

# Draw the network graph
pos = nx.spring_layout(subgraph)
nx.draw_networkx_nodes(subgraph, pos, node_color='lightblue', node_size=200)
nx.draw_networkx_edges(subgraph, pos, edge_color='gray')
nx.draw_networkx_labels(subgraph, pos, font_size=8)
plt.title('Dispersity of Commenters across Communities')
plt.axis('off')
plt.show()


In [None]:
import plotly.graph_objects as go


# Create nodes and links for the Sankey diagram
nodes = list(set(subreddits))
node_indices = {node: index for index, node in enumerate(nodes)}
source_indices = [node_indices[subreddit] for _, _, subreddit in dispersed_posts]
target_indices = [(index + 1) % len(nodes) for index in source_indices]
link_values = [1] * len(dispersed_posts)

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(label=nodes),
    link=dict(source=source_indices, target=target_indices, value=link_values)
)])
fig.update_layout(title_text='Dispersity of Commenters across Subreddits')
fig.show()


In [None]:
import plotly.graph_objects as go

# Extract dispersity and subreddit information
dispersity_values = [dispersity for _, dispersity, _ in dispersed_posts]
subreddits = list(set(subreddits))
num_subreddits = len(subreddits)

# Create an empty matrix to store the connections
matrix = [[0] * num_subreddits for _ in range(num_subreddits)]

# Fill the matrix with the dispersity values
for post_title, dispersity, subreddit in dispersed_posts:
    source_index = subreddits.index(subreddit)
    for target_index in range(num_subreddits):
        if target_index != source_index:
            matrix[source_index][target_index] += 1

# Create the chord diagram
fig = go.Figure(data=[go.Chord(
    matrix=matrix,
    labels=subreddits,
    colorscale='Viridis',
    hovertext=subreddits,
)])

fig.update_layout(
    title='Dispersity of Commenters across Subreddits',
    font_size=12,
    width=800,
    height=800,
)

fig.show()
