In [9]:
# Importing the desired libraries
import numpy as np
from tqdm import tqdm

In [10]:
# Reading the data from the file
file = open("./Data/p2p-Gnutella08.txt", "r")
graph = file.read().splitlines()[4:]
file.close()

In [11]:
# Converting the data into tuples where (a,b) represents an edge from a to b
newGraph = [i.split("\t") for i in graph]
newGraph = [[int(j) for j in i] for i in newGraph]

In [12]:
# Creating a list of nodes in the graph
listOfNodes = set()
for i in tqdm(range(len(newGraph))):
    listOfNodes.add(newGraph[i][0])
    listOfNodes.add(newGraph[i][1])

100%|██████████| 20777/20777 [00:00<00:00, 335092.09it/s]


### Adjacency Matrix

In [13]:
# convert to adjacency matrix
# adjacencyMatrix[i,j] = 1 if there is an edge from i to j
# adjacencyMatrix[i,j] = 0 otherwise
adjacencyMatrix = np.zeros((len(listOfNodes), len(listOfNodes)))
for i in tqdm(newGraph):
    adjacencyMatrix[i[0], i[1]] = 1

100%|██████████| 20777/20777 [00:00<00:00, 305153.25it/s]


### Adjacency List

In [14]:
# convert to adjacency list
adjacencyList = {}
# Iterate over all the edges in the graph
for i in tqdm(newGraph):
    # If the source node is already present in the adjacency list
    # Append the destination node to the list of nodes adjacent to the source node
    # Else, add the source node to the adjacency list
    # Add the destination node to the adjacency list as well 
    if i[0] in adjacencyList:
        adjacencyList[i[0]].append(i[1])
        if i[1] not in adjacencyList:
            adjacencyList[i[1]] = []
    else:
        adjacencyList[i[0]] = [i[1]]
        if i[1] not in adjacencyList:
            adjacencyList[i[1]] = []   

100%|██████████| 20777/20777 [00:00<00:00, 492578.71it/s]


### Number of Nodes 

In [16]:
print(f"Number of nodes in the graph: {len(adjacencyList)}")

Number of nodes in the graph: 6301


### Number of Edges

In [17]:
# Summmation of the number of edges in the graph
# The number of edges in the graph = length of the adjacency list of each node
numEdges = 0
for i in adjacencyList:
    numEdges += len(adjacencyList[i])
print(f"Number of edges in the graph: {numEdges}")

Number of edges in the graph: 20777


### Calculation of inDegree and outDegree.

In [37]:
# Calculate the in-degree and out-degree of each node
#  We iterate over all the edges in the graph (a,b) 
#   where a is the source node and b is the destination node
#  When a node is the source node, its out-degree is incremented
#  When a node is the destination node, its in-degree is incremented
nodes = len(listOfNodes)
inDegreeList = dict(zip(list(listOfNodes), [0]*nodes))
outDegreeList = dict(zip(list(listOfNodes), [0]*nodes))
for i in newGraph:
    fromNode = i[0]
    toNode = i[1]
    inDegreeList[toNode] += 1
    outDegreeList[fromNode] += 1

### Average In Degree

In [38]:
print(f"Average In-Degree: {np.mean(list(inDegreeList.values()))}")

Average In-Degree: 3.2974131090303125


### Average Out Degree

In [39]:
print(f"Average Out-Degree: {np.mean(list(outDegreeList.values()))}")

Average Out-Degree: 3.2974131090303125


### Node with max in degree

In [40]:
# Iterate over all the nodes in the graph
#  Find the node with the maximum in-degree
maxInDegree = -1
maxInDegreeNode = -1
for i in tqdm(inDegreeList):
    if inDegreeList[i] > maxInDegree:
        maxInDegree = inDegreeList[i]
        maxInDegreeNode = i

100%|██████████| 6301/6301 [00:00<00:00, 891222.42it/s]


In [41]:
print(f"Node with maximum in-degree: {maxInDegreeNode}, in-degree: {maxInDegree}")

Node with maximum in-degree: 266, in-degree: 91


### Node with max out degree

In [42]:
# Iterate over all the nodes in the graph
#  Find the node with the maximum out-degree
maxOutDegree = -1
maxOutDegreeNode = -1
for i in tqdm(outDegreeList):
    if outDegreeList[i] > maxOutDegree:
        maxOutDegree = outDegreeList[i]
        maxOutDegreeNode = i

100%|██████████| 6301/6301 [00:00<00:00, 1030584.52it/s]


In [43]:
print(f"Node with maximum out-degree: {maxOutDegreeNode}, out-degree: {maxOutDegree}")

Node with maximum out-degree: 5831, out-degree: 48


### Density of the Graph

In [44]:
# For directed graph, the maximum number of edges is
# nodes * (nodes - 1)
# Density = total number of edges / maximum number of edges
nodes = len(listOfNodes)
maxEdges = nodes * (nodes - 1)

In [45]:
print(f"Graph density: {numEdges / maxEdges}")

Graph density: 0.0005233989061952878
