### **Importing Libraries**

In [107]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from networkx.algorithms.community.centrality import girvan_newman

### **Loading DataSet**

In [108]:
data_url = "./Dataset/Cit-HepPh.txt"
df_data_1 = pd.read_csv(data_url, sep='\t', skiprows=4, names=['FromNodeId', 'ToNodeId'], dtype={'FromNodeId': int, 'ToNodeId': int})

### **Loading Time of Release**

In [109]:
data_url = "./Dataset/cit-HepPh-dates.txt"
df_data_2 = pd.read_csv(data_url, sep='\t', skiprows=1, names=['NodeId', 'Date'], dtype={'NodeId': str, 'Date': str})
df_data_2['Date'] = pd.to_datetime(df_data_2['Date'])
df_data_2 = df_data_2[~df_data_2['NodeId'].str.startswith('11')]
df_data_2['NodeId'] = df_data_2['NodeId'].astype(str).str.lstrip('0')
df_data_2['NodeId'] = df_data_2['NodeId'].astype(int)
df_data_2 = df_data_2[df_data_2['Date'].dt.year <= 1992]
i = 0
unnodes = df_data_2['NodeId']
for nodes in unnodes:
    i += 1
print(i)


755


### **Merging Both DataSet**

In [110]:
df_merged = pd.merge(df_data_1, df_data_2, how='inner', left_on='FromNodeId', right_on='NodeId')
df_merged['Date'] = pd.to_datetime(df_merged['Date'])
# Filter out rows where 'ToNodeId' is not present in 'NodeId' column of df_data_2
df_merged = df_merged[df_merged['ToNodeId'].isin(df_data_2['NodeId'])]
unnodes = df_merged['FromNodeId'].unique()
i = 0
for nodes in unnodes:
    i += 1
print(i)

105


### **Creation of Graph**

In [111]:

# Construct the directed graph
G_lat = nx.from_pandas_edgelist(df_merged, 'FromNodeId', 'ToNodeId', create_using=nx.DiGraph())

print("Number of nodes:", len(G_lat.nodes()))
print("Number of edges:", len(G_lat.edges()))
print(nx.density(G_lat))



Number of nodes: 173
Number of edges: 152
0.005108213469552359


### **Yearly Analysis**

In [112]:
df_merged['Year'] = df_merged['Date'].dt.year
density_by_year = {}
dia_by_year = {}
grouped = df_merged.groupby('Year')

for year, group in grouped:
    filtered_data = df_merged[df_merged['Year'] <= year]
    G = nx.from_pandas_edgelist(filtered_data, 'FromNodeId', 'ToNodeId', create_using=nx.DiGraph())

    density = nx.density(G)
    # dia = nx.diameter(G)
    print(density)

    density_by_year[year] = density
    # dia_by_year[year] = dia

0.005108213469552359


### **Girvan-Newman Algorithm**

In [113]:
def edge_to_remove(g):
    d1 = nx.edge_betweenness_centrality(g) 
    list_of_tuples = list(d1.items()) 
      
    sorted(list_of_tuples, key = lambda x:x[1], reverse = True) 
      
    # Will return in the form (a,b) 
    return list_of_tuples[0][0] 

def girvan(graph):
    graph_cp = graph.copy()

    init_comp = nx.number_weakly_connected_components(graph_cp)
    while True:
        u ,v = edge_to_remove(graph_cp)
        graph_cp.remove_edge(u,v)

        new_comp = nx.number_weakly_connected_components(graph_cp)

        if new_comp > init_comp:
            break;
    return list(nx.weakly_connected_components(graph_cp))

scratch_communities = girvan(G_lat)
# print("Final communities:", communities)
# for community in scratch_communities:
#     print(community)

### **Checking Results**

In [116]:
# Use the inbuilt Girvan-Newman algorithm to detect communities
inbuilt_communities_generator = girvan_newman(G_lat)
inbuilt_communities = [c for c in next(inbuilt_communities_generator)]

inbuilt_communities_sets = [set(community) for community in inbuilt_communities]
scratch_communities_sets = [set(community) for community in scratch_communities]

for community in inbuilt_communities:
    print(community)

if sorted(inbuilt_communities_sets) == sorted(scratch_communities_sets):
    print("Communities match!")
else:
    print("Communities do not match.")


{9210240, 9209250, 9209220, 9207237, 9211207, 9210280, 9212267, 9209232, 9210257, 9207219, 9203220, 9210235, 9211286, 9206203, 9211228, 9211325}
{9211234, 9208205, 9205247}
{9207264, 9206242, 9207213, 9212269, 9212271, 9212284}
{9212305, 9203210, 9209292, 9211230}
{9212288, 9203201, 9203203, 9204228, 9209227, 9209233, 9210278, 9208231, 9211303, 9212216, 9212224, 9211334, 9211211, 9212235, 9211218, 9206227, 9211219, 9212248, 9210227, 9204216}
{9208233, 9209246, 9212279}
{9207236, 9205221}
{9206205, 9212278, 9209295}
{9209203, 9212252}
{9212285, 9205238}
{9207209, 9209299, 9207207}
{9209268, 9204237}
{9212233, 9212227, 9211244, 9207214}
{9203202, 9209285, 9208230, 9203206, 9207208, 9208262, 9207243, 9205229, 9205230, 9212245, 9209206, 9209208, 9212219, 9212318}
{9211267, 9212230, 9212295, 9209241, 9212205, 9209262, 9204207, 9208244, 9206230, 9209239, 9203225}
{9205228, 9204206, 9211216, 9204212, 9209272}
{9210233, 9204204, 9210276, 9211292}
{9212296, 9207266, 9205205}
{9210260, 9204215}
