In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from community import community_louvain
from collections import Counter

file_path = 'com-youtube.ungraph.txt'
df = pd.read_csv(file_path, sep='\t', header=None, names=['Source', 'Target'], dtype=str)

G = nx.Graph()
edges = df.to_records(index=False)
G.add_edges_from(edges)

print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())
print("Density of the graph:", nx.density(G))

sampled_nodes = list(G.nodes)[:500]  
H = G.subgraph(sampled_nodes)

print("Average clustering coefficient (on sampled subgraph):", nx.average_clustering(H))

degree_centrality = nx.degree_centrality(G)
sorted_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)
print("\nTop 5 influential users by degree centrality:")
for user, centrality in sorted_degree_centrality[:5]:
    print(f"User: {user}, Degree Centrality: {centrality:.4f}")

partition = community_louvain.best_partition(G)
print("\nNumber of communities detected:", len(set(partition.values())))

pos = nx.spring_layout(G)
plt.figure(figsize=(12, 12))
nx.draw_networkx_nodes(G, pos, node_color=[partition[node] for node in G.nodes()], cmap=plt.cm.RdYlBu, node_size=50, alpha=0.8)
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.title('Community Structure of the Social Network')
plt.show()

community_counts = Counter(partition.values())
print("\nCommunity sizes:")
for community, count in community_counts.items():
    print(f"Community {community}: {count} nodes")
