### **Importing Libraries**

In [1]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

### **Loading DataSet**

In [2]:
data_url = "./Dataset/Cit-HepPh.txt"
df_data_1 = pd.read_csv(data_url, sep='\t', skiprows=4, names=['FromNodeId', 'ToNodeId'], dtype={'FromNodeId': int, 'ToNodeId': int})

### **Loading Time of Release**

In [26]:
data_url = "./Dataset/cit-HepPh-dates.txt"
df_data_2 = pd.read_csv(data_url, sep='\t', skiprows=1, names=['NodeId', 'Date'], dtype={'NodeId': str, 'Date': str})
df_data_2['Date'] = pd.to_datetime(df_data_2['Date'])
df_data_2['NodeId'] = df_data_2['NodeId'].astype(str).str.lstrip('0')
df_data_2 = df_data_2[~df_data_2['NodeId'].str.startswith('11')]
df_data_2['NodeId'] = df_data_2['NodeId'].astype(int)
df_data_2 = df_data_2[df_data_2['Date'].dt.year <= 1994]
i = 0
unnodes = df_data_2['NodeId']
for nodes in unnodes:
    i += 1
print(i)


5018


### **Merging Both DataSet**

In [25]:
df_merged = pd.merge(df_data_1, df_data_2, how='left', left_on='FromNodeId', right_on='NodeId')
df_merged['Date'] = pd.to_datetime(df_merged['Date'])
# Filter out rows where 'ToNodeId' is not present in 'NodeId' column of df_data_2
# df_merged = df_merged[df_merged['ToNodeId'].isin(df_data_2['NodeId'])]
unnodes = df_merged['FromNodeId'].unique()
i = 0
for nodes in unnodes:
    i += 1
print(i)

32158


### **Creation of Graph**

In [31]:

# Construct the directed graph
G_lat = nx.from_pandas_edgelist(df_merged, 'FromNodeId', 'ToNodeId', create_using=nx.DiGraph())

print("Number of nodes:", len(G_lat.nodes()))
print("Number of edges:", len(G_lat.edges()))



Number of nodes: 34546
Number of edges: 421578


### **Yearly Analysis**

In [32]:
df_merged['Year'] = df_merged['Date'].dt.year
density_by_year = {}
dia_by_year = {}
grouped = df_merged.groupby('Year')

for year, group in grouped:
    filtered_data = df_merged[df_merged['Year'] <= year]
    G = nx.from_pandas_edgelist(filtered_data, 'FromNodeId', 'ToNodeId', create_using=nx.DiGraph())

    density = nx.density(G)
    # dia = nx.diameter(G)
    print(density)

    density_by_year[year] = density
    # dia_by_year[year] = dia

0.004587651122625216
0.001251041463088491
0.0007606427249432969
