In [6]:
import pandas as pd
import networkx as nx
from statistics import mean

##  Data prepreocessing data

In [8]:
node_feat = pd.read_csv('cora.content', delim_whitespace=True, header=None)
node_feat.columns = ['Node'] + [f'feat_{i}' for i in range(1, node_feat.shape[1]-1)]+['type']
node_feat

Unnamed: 0,Node,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_1425,feat_1426,feat_1427,feat_1428,feat_1429,feat_1430,feat_1431,feat_1432,feat_1433,type
0,31336,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Neural_Networks
1,1061127,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,Rule_Learning
2,1106406,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
3,13195,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
4,37879,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Probabilistic_Methods
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2703,1128975,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Genetic_Algorithms
2704,1128977,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Genetic_Algorithms
2705,1128978,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Genetic_Algorithms
2706,117328,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Case_Based


In [9]:
edges = pd.read_csv('cora.cites', delim_whitespace=True, header=None,names=["node1","node2"])

edges

Unnamed: 0,node1,node2
0,35,1033
1,35,103482
2,35,103515
3,35,1050679
4,35,1103960
...,...,...
5424,853116,19621
5425,853116,853155
5426,853118,1140289
5427,853155,853118


## create direct graph

In [10]:
G = nx.DiGraph()

G.add_nodes_from(node_feat['Node'])

edge = [(row['node2'], row['node1']) for index, row in edges.iterrows()]
G.add_edges_from(edge)

In [14]:
in_degrees = dict(G.in_degree())
out_degrees = dict(G.out_degree())
total_degrees = {node: in_degrees[node] + out_degrees[node] for node in G.nodes()}

In [17]:
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Maximum degree: {max(total_degrees.values())}")
print(f"Maximum degree: {mean(total_degrees.values())}")

Number of nodes: 2708
Number of edges: 5429
Maximum degree: 169
Maximum degree: 4.0096011816839


In [18]:
max(in_degrees.values())

166

## the most cited paper with ID : 35 under title 'Genetic_Algorithms'

In [19]:
target_value = max(in_degrees.values())
keys_with_max_value = [key for key, value in in_degrees.items() if value == target_value]

print( keys_with_max_value)

[35]


In [20]:
node_feat[node_feat['Node']==35]['type']

163    Genetic_Algorithms
Name: type, dtype: object

## select only first 8 nodes in this network and calcuate adj_matrix 

In [29]:
first_8_nodes = list(node_feat['Node'][:8])
print(first_8_nodes)


[31336, 1061127, 1106406, 13195, 37879, 1126012, 1107140, 1102850]


In [30]:
subgraph=G.subgraph(first_8_nodes)

In [31]:
subgraph.number_of_nodes()

8

In [32]:
subgraph.number_of_edges()

0

In [33]:
adj_matrix = nx.to_numpy_array(subgraph)

## we cannot find any cited or citing paper between nodes through this subgraph

In [34]:
adj_matrix

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])