# Q1

In [2]:
import plotly.express as px
import plotly.graph_objects as go
from pyvis.network import Network
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import numpy as np

In [3]:
# 12831.edges file has the following format:
# <source> <destination>
# the grapgh is undirected
# find the max node id
# read the file and create the adjacecny matrix

# get source and destination nodes from the file
def get_source_destination(file_name):
    source_list = []
    destination_list = []
    with open(file_name, 'r') as f:
        for line in f:
            line = line.strip()
            source, destination = line.split(' ')
            source, destination = int(source), int(destination)
            source_list.append(source)
            destination_list.append(destination)
    return source_list, destination_list

# count the number of unique nodes in the file
def count_unique_nodes(source_list, destination_list):
    unique_nodes = set()
    for source, destination in zip(source_list, destination_list):
        unique_nodes.add(source)
        unique_nodes.add(destination)
    return len(unique_nodes)

# create a dictionary that assigns a unique id to each node in the order they occur
def get_node_id_dict(source_list, destination_list):
    node_dict = {}
    for source, destination in zip(source_list, destination_list):
        if source not in node_dict:
            node_dict[source] = len(node_dict)
        if destination not in node_dict:
            node_dict[destination] = len(node_dict)
    node_id_dict = {}
    for node, node_id in node_dict.items():
        node_id_dict[node_id] = node
    return node_id_dict, node_dict

source, destination = get_source_destination('12831.edges')
unique_nodes = count_unique_nodes(source, destination)
node_dict, node_id_dict = get_node_id_dict(source, destination)
print(node_dict,'\n', node_id_dict)

{0: 398874773, 1: 652193, 2: 18498878, 3: 14749606, 4: 14305022, 5: 8479062, 6: 22253, 7: 12741, 8: 15540222, 9: 14809096, 10: 7415132, 11: 14172562, 12: 17129553, 13: 13839772, 14: 15911247, 15: 14086492, 16: 14087951, 17: 57378470, 18: 19094625, 19: 19479427, 20: 1186, 21: 29294520, 22: 13462502, 23: 13652832, 24: 40198602, 25: 174958347, 26: 287713, 27: 2727051, 28: 1765921, 29: 14710479, 30: 8630562, 31: 20496869, 32: 180505807, 33: 1678471, 34: 663463, 35: 728163, 36: 3191321, 37: 17408993, 38: 765548, 39: 13141442, 40: 883301, 41: 9616792, 42: 1371101, 43: 13334762, 44: 19966557, 45: 12725022, 46: 9283582, 47: 14231571, 48: 713263, 49: 12800212, 50: 14450509, 51: 165964253, 52: 14, 53: 563200400, 54: 15236339, 55: 606083, 56: 104937383, 57: 14178728, 58: 20755177, 59: 14163141, 60: 57739496, 61: 12007182, 62: 17729005, 63: 10587552, 64: 377821426, 65: 9411772, 66: 17633994, 67: 15639334, 68: 1260231, 69: 668423, 70: 58166411, 71: 586, 72: 11178592, 73: 5994452, 74: 14471007, 75: 

In [4]:
# make an adjacency matrix
def get_adj_matrix(source_list, destination_list, node_id_dict):
    adjacency_matrix = np.zeros((unique_nodes, unique_nodes))
    for source, destination in zip(source_list, destination_list):
        source_id = node_id_dict[source]
        destination_id = node_id_dict[destination]
        adjacency_matrix[source_id, destination_id] = 1
    return adjacency_matrix

adjacency_matrix = get_adj_matrix(source, destination, node_id_dict)
print(adjacency_matrix)

[[0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [5]:
print('Number of unique nodes: ', adjacency_matrix.shape[0])
print('Number of edges: ', np.sum(adjacency_matrix))
print('Average indegree: ', np.sum(adjacency_matrix, axis=0).mean())
print('Average outdegree: ', np.sum(adjacency_matrix, axis=1).mean())
print('Node with max indegree: ', node_dict[np.argmax(np.sum(adjacency_matrix, axis=0))])
print('Node with max outdegree: ',node_dict[np.argmax(np.sum(adjacency_matrix, axis=1))])
print('Density of network', np.sum(adjacency_matrix)/(adjacency_matrix.shape[0]*adjacency_matrix.shape[1]))

Number of unique nodes:  236
Number of edges:  2478.0
Average indegree:  10.5
Average outdegree:  10.5
Node with max indegree:  180505807
Node with max outdegree:  1186
Density of network 0.04449152542372881
