In [None]:
import networkx as nx
import pandas as pd
from community import community_louvain
import json
import pprint


In [None]:
def undirected(year):
    G=nx.Graph()
    df=pd.read_csv(f"graphs/{year}.csv")
    for index, row in df.iterrows():
        source=row["source"]
        target=row["target"]
        weight=float(row[f"{year}"])
        if G.has_edge(row["source"],row["target"]):
            G[source][target]['weight']+=weight
        else:

            G.add_edge(source,target,weight=weight)

    return G



In [None]:
G=undirected(1999)

In [None]:
x=nx.adjacency_matrix(G)

In [None]:
G.nodes()

In [None]:
import networkx as nx
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt

# Create a weighted graph (replace this with your weighted graph)
# G = nx.Graph()
# G.add_weighted_edges_from([(1, 2, 0.5), (1, 3, 0.8), (2, 3, 0.6), (3, 4, 0.7), (4, 5, 0.9)])

# Convert the graph to a distance matrix
distances = nx.to_numpy_array(G)

# Perform hierarchical clustering
linkage_matrix = sch.linkage(distances, method='ward')  # Ward's linkage method

# Plot the dendrogram
plt.figure(figsize=(20, 10))
dendrogram = sch.dendrogram(linkage_matrix, labels=list(G.nodes()), leaf_rotation=270)
plt.title('Dendrogram of Hierarchical Clustering')
plt.xlabel('Nodes')
plt.ylabel('Distance')
plt.show()


In [None]:
partition = community_louvain.best_partition(G, weight='weight')

In [None]:
def clustering(G):
    partition = community_louvain.best_partition(G, weight='weight')
    cluster={}
    for k,v in partition.items():
        if v not in cluster:
            cluster[v]=[k]
        else:
            cluster[v].append(k)

    return partition,cluster

    


In [None]:
cluster={}
for k,v in partition.items():
    if v not in cluster:
        cluster[v]=[k]
    else:
        cluster[v].append(k)

print(len(cluster.keys()))


In [None]:
def draw_clusters(G, partition,year):
    # pos = nx.kamada_kawai_layout(G)  # Positions for all nodes
    pos = nx.spring_layout(G, k=5, iterations=50)
    plt.figure(figsize=(20, 20))
    # Draw nodes, colored by cluster
    for cluster_id in set(partition.values()):
        nodes = [node for node, c in partition.items() if c == cluster_id]
        nx.draw_networkx_nodes(G, pos, nodelist=nodes, node_color=plt.cm.jet(float(cluster_id) / len(set(partition.values()))), node_size=50, alpha=0.8)

    # Draw edges
    nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.3)

    # Draw labels
    nx.draw_networkx_labels(G, pos, font_size=8, font_family="sans-serif")

    plt.axis("off")
    plt.savefig(f"cluster_images/{year}")


In [None]:
def sum_edge_weights(G):
    total_weight = sum(weight for _, _, weight in G.edges(data='weight', default=1))
    return total_weight


In [None]:
cluster_strength={}
for i in [2000,2004,2008,2012,2016,2020]:
    G=undirected(i)
    partition,clusters=clustering(G)
    cluster_strength[i]=[]
    for key in clusters.keys():
        ratio=find_ratio(clusters[key])
        # cluster_strength[i][0].append(clusters[key])
        # cluster_strength[i][1].append(ratio)
        cluster_strength[i].append([clusters[key],ratio])


    draw_clusters(G,partition,i)
    

    
    

In [None]:
with open("clusters.txt","w") as fp:
    json.dump(cluster_strength,fp)

In [None]:
with open("clusters.txt","w") as fp:
    for i in [2000,2004,2008,2012,2016,2020]:
        fp.write(i,"\n")
        for j in range(len(cluster_strength[i][1])):
            fp.write(cluster_strength[i][0][j],cluster_strength[i][1][j],"\n")
       

In [None]:
with open("clusters.log", "w") as log_file:
    pprint.pprint(cluster_strength,log_file)

In [None]:
# print(cluster)
def find_ratio(cluster):
    subgraph=G.subgraph(cluster)
    int_edges=sum_edge_weights(subgraph)
    ext_set=nx.edge_boundary(G,subgraph)
    ext_edges=sum([float(G[u][v]['weight']) for u,v in ext_set])
    ratio=int_edges/ext_edges
    return ratio


In [None]:
communities_generator = nx.community.girvan_newman(G)
top_level_communities = next(communities_generator)
next_level_communities = next(communities_generator)
sorted(map(sorted, next_level_communities))