### **Importing Libraries**


In [74]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

### **Loading DataSet**

In [75]:
data_url = "./Dataset/Cit-HepPh.txt"
df_data_1 = pd.read_csv(data_url, sep='\t', skiprows=4, names=['FromNodeId', 'ToNodeId'], dtype={'FromNodeId': int, 'ToNodeId': int})

### **Loading Time of Release**

In [76]:
data_url = "./Dataset/cit-HepPh-dates.txt"
df_data_2 = pd.read_csv(data_url, sep='\t', skiprows=1, names=['NodeId', 'Date'], dtype={'NodeId': str, 'Date': str})
df_data_2['Date'] = pd.to_datetime(df_data_2['Date'])
graphs_by_date = {}
df_data_2['NodeId'] = df_data_2['NodeId'].astype(str).str.lstrip('0')
# Filter entries with '11' at the beginning of the 'NodeId' column
df_data_2 = df_data_2[~df_data_2['NodeId'].str.startswith('11')]
df_data_2['NodeId'] = df_data_2['NodeId'].astype(int)


### **Merging Both DataSet**

In [77]:
df_merged = pd.merge(df_data_1, df_data_2, how='left', left_on='FromNodeId', right_on='NodeId')
df_merged['Date'] = pd.to_datetime(df_merged['Date'])

### **Creation of Graph**

In [78]:

# Construct the directed graph
G_lat = nx.from_pandas_edgelist(df_data_1, 'FromNodeId', 'ToNodeId', create_using=nx.DiGraph())

print("Number of nodes:", len(G_lat.nodes()))
print("Number of edges:", len(G_lat.edges()))
density = nx.density(G_lat)
print("Density of the graph:", density)
unique_nodes_from = df_merged['FromNodeId'].unique()
unique_nodes_to = df_merged['ToNodeId'].unique()

# Find the intersection of unique nodes
intersection_nodes = np.union1d(unique_nodes_from, unique_nodes_to)

# Print the intersection nodes
print("Intersection nodes between 'FromNodeId' and 'ToNodeId':")
i = 0
for node in intersection_nodes:
    i += 1
    # print(node)
print(i)



Number of nodes: 34546
Number of edges: 421578
Density of the graph: 0.00035326041393102855
Intersection nodes between 'FromNodeId' and 'ToNodeId':
34546
