In [3]:
import numpy as np
import os
import pandas as pd
from tqdm.notebook import tqdm
import copy
import sys
import time

In [109]:
#Partially updated
class Graph:
    
    def __init__(self, n, mode="default", representation="matrix"):
        self.n_nodes = n
        self.mode = mode
        self.has_matrix = False
        self.matrix_error_size = None
        
        if representation == "matrix":
            try:
                self.matrix = np.full((n, n), np.inf)
                self.has_matrix = True

            except MemoryError as error:
                print("Cannot create matrix >>", error)
                ea = str(error)
                eb = ea[ea.index("iB for")-1]
                eb_m = 10**3 if eb=="G" else 10**6
                self.matrix_error_size = float(ea[ea.index("allocate")+9:ea.index("iB for")-2]) * eb_m

                self.lists = [list() for i in range(n)]
        
        elif representation == "lists":
            self.lists = [list() for i in range(n)]
        
        else:
            #UPDATE HERE WITH AN ERROR MESSAGE
            print("Invalid Representation")
    
    
    
    def add_edge(self, v, w, weight=1):
        
        if self.has_matrix:
            if v != w:
                self.matrix[v-1, w-1] = weight
                self.matrix[w-1, v-1] = weight
        else:
            if v != w:
                if mode == "default":
                    self.lists[v-w].append(w)
                    self.lists[w-1].append(v)
                    
                if mode == "weighted":
                    self.lists[v-1].append([w, weight])
                    self.lists[w-1].append([v, weight])

    
    def get_node(self, v):
        
        if not self.has_matrix:
            return self.lists[v-1]
        
        else:
            if self.mode == "default":
                return (np.where(self.matrix[v-1] == 1)[0] + 1).tolist()
            
            if self.mode == "weighted":
                neighbors = (np.where(self.matrix[v-1] < np.inf)[0] + 1).tolist()
                weights = self.matrix[v-1][self.matrix[v-1] < np.inf].tolist()
                return [list(t) for t in zip(neighbors, weights)]
    
    def get_lists(self):
        if not self.has_matrix:
            return self.lists
        else:
            return [self.get_node(n) for n in range(self.n_nodes)]
    
    def get_matrix(self):
        if self.has_matrix:
            return self.matrix
        return None
    
    def get_matrix_beautiful(self):
        if self.has_matrix:
            return pd.DataFrame(self.matrix, columns=np.arange(1, self.n_nodes+1), index=np.arange(1, self.n_nodes+1))
        return None

In [139]:
#Already Updated
def open_graph_txt(filename, extra=False, representation="matrix"):
    with open(filename, "r") as f:
        lines = [line for line in f.read().split("\n") if line != ""]
        n_nodes = int(lines[0])
        
        if np.array([line.split(" ") for line in lines[1:]]).shape[1] < 3:
            mode = "default"
        else:
            mode = "weighted"
        
        edges = [tuple(map(lambda i: int(i), line.split(" ")[:2])) for line in lines[1:]]
        
        if mode == "default":
            weights = [1 for line in lines[1:]]
        if mode == "weighted":
            weights = [float(line.split(" ")[-1]) for line in lines[1:]]
        
        edges_weights = list(zip(edges, weights))
        
        graph = Graph(n_nodes, mode=mode, representation=representation)
        for (v, w), weight in edges_weights:
            graph.add_edge(v, w, weight=weight)
    
#     if extra:
#         return graph, n_nodes, edges

    return graph


In [140]:
graph = open_graph_txt("inputs/grafo_1.txt", extra=False)

In [141]:
#To Update
def graph_statistics(graph):
    print("Número de vértices:", graph.n_nodes)
    
    if graph.has_matrix:
        print("Número de arestas:", graph.get_matrix().sum()/2)
        print("Grau mínimo:", graph.get_matrix().sum(axis=0).min())
        print("Grau máximo:", graph.get_matrix().sum(axis=0).max())
        print("Grau médio:", graph.get_matrix().sum(axis=0).mean())
        print("Mediana do Grau:", np.median(graph.get_matrix().sum(axis=0)))
    else:
        print("Número de arestas:", np.sum([len(x) if i+1 not in x else len(x) + 1 for i, x in enumerate(graph.get_lists())])/2)
        print("Grau mínimo:", np.min([len(x) if i+1 not in x else len(x) + 1 for i, x in enumerate(graph.get_lists())]))
        print("Grau máximo:", np.max([len(x) if i+1 not in x else len(x) + 1 for i, x in enumerate(graph.get_lists())]))
        print("Grau médio:", np.mean([len(x) if i+1 not in x else len(x) + 1 for i, x in enumerate(graph.get_lists())]))
        print("Mediana do Grau:", np.median([len(x) if i+1 not in x else len(x) + 1 for i, x in enumerate(graph.get_lists())]))
    print("List: ", sys.getsizeof(graph.get_lists())/(10**6), "MB")
    print("Matrix: ", (str(sys.getsizeof(graph.get_matrix())/(10**6))) if graph.has_matrix else (graph.matrix_error_size), "MB")

In [158]:
#Already updated
class DFS:
    def __init__(self, graph, root):
        self.graph = graph
        if self.graph.mode != "default":
            return "Error"
        
        self.visited = np.zeros(graph.n_nodes, dtype="uint8")
        self.level = np.full(graph.n_nodes, fill_value=-1, dtype="int32")
        self.parent = np.full(graph.n_nodes, fill_value=-1, dtype="int32")
        self.level[root-1] = 0
        self.start_root(root)
    
    def start_root(self, root):
        self.stack = []
        self.stack.append(root)
    
    def search(self):
        while(len(self.stack) != 0):
            u = self.stack.pop()
            
            if not self.visited[u-1]:
                self.visited[u-1] = 1
                
                for v in sorted(self.graph.get_lists()[u-1], reverse=True):
                    if not self.visited[v-1]:
                        self.stack.append(v)
                        self.parent[v-1] = u
                        self.level[v-1] = self.level[u-1] + 1

In [163]:
#Already updated
class BFS:
    def __init__(self, graph, root):
        self.graph = graph
        if self.graph.mode != "default":
            return "Error"
        
        self.visited = np.zeros(graph.n_nodes, dtype="uint8")
        self.level = np.full(graph.n_nodes, fill_value=-1, dtype="int32")
        self.parent = np.full(graph.n_nodes, fill_value=-1, dtype="int32")
        
        self.level[root-1] = 0
        self.visited[root-1] = 1
        
        self.start_root(root)
        
    def start_root(self, root):
        self.queue = []
        self.queue.append(root)
        
    def search(self):
        
        while(len(self.queue)):
            v = self.queue.pop(0)
            
            for w in sorted(self.graph.get_lists()[v-1]):
                if v == w:
                        continue
                if not self.visited[w-1]:
                    self.visited[w-1] = 1
                    self.queue.append(w)
                    self.parent[w-1] = v
                    self.level[w-1] = self.level[v-1] + 1

In [151]:
#To Update

class MinimumPath:
    
    def __init__(self, graph):
        self.graph = graph
        self.matrix = np.full((graph.n_nodes, graph.n_nodes), fill_value=-1, dtype="int32")
        self.run()
    
    def run(self):
        for v in tqdm(range(1, self.graph.n_nodes+1)):
            bfsl = BFSL(self.graph, v)
            bfsl.search()
            for bfsl_node_index in np.argwhere(bfsl.visited == 1).reshape(-1):
                self.matrix[v-1, bfsl_node_index] = bfsl.level[bfsl_node_index]
            del bfsl
    
    def get_distance(self, u, v):
        return self.matrix[u-1, v-1]
    
    def get_diameter(self):
        return np.max(self.matrix)
    
    def get_matrix(self):
        return self.matrix
    
    def get_matrix_beautiful(self):
        return pd.DataFrame(self.matrix, columns=np.arange(1, self.graph.n_nodes+1), index=np.arange(1, self.graph.n_nodes+1))
    

In [10]:
#To Update
class Components:
    
    def __init__(self, graph):
        self.graph = graph
        self.visited = np.zeros(graph.n_nodes, dtype="uint8")
        self.components = []
        
        while np.argwhere(self.visited == 0).reshape(-1).shape[0] > 0:
            root = np.argwhere(self.visited == 0).reshape(-1)[0] + 1

            bfsl = BFSL(self.graph, root)
            bfsl.search()
            
            bfsl_visited_index = np.argwhere(bfsl.visited == 1).reshape(-1)
            
            self.visited[bfsl_visited_index] = 1
            self.components.append((bfsl_visited_index+1).tolist())

    def get_components(self):
        a = sorted(self.components, key=lambda x: len(x), reverse=True)
        b = [len(x) for x in a]
        c = list(zip(b, a))
        return c


In [11]:
# from pyvis.network import Network

# net = Network(notebook=True)

# for v in range(1, graph.n_nodes+1):
#     net.add_node(v, label=v)
    
# for v, neighbors in enumerate(graph.nodes):
#     for w in neighbors:
#         net.add_edge(v+1, w)

# net.show("graph.html")

In [12]:
# folder = "inputs"
# filename = "grafo_5.txt"

# path = os.path.join(folder, filename)

# graph = open_graph_txt(path)
# graph.sort_neighbors()

# # print(graph.has_matrix)
# # graph_statistics(graph)
# # graph.has_matrix = False
# # print("----")
# # print(graph.has_matrix)
# graph_statistics(graph)

Cannot create matrix >> Unable to allocate 931. GiB for an array with shape (1000022, 1000022) and data type uint8
Número de vértices: 1000022
Número de arestas: 17666161.0
Grau mínimo: 1
Grau máximo: 87
Grau médio: 35.331544706016466
Mediana do Grau: 37.0
List:  8.448728 MB
Matrix:  931000.0 MB


In [168]:
dfs = DFS(graph, 1)
dfs.search()

print("DFS Result:")
dfs_df = pd.DataFrame(list(zip(range(1, dfs.graph.n_nodes+1), dfs.level, dfs.parent)), columns=["node", "level", "parent"], index=np.arange(1, dfs.graph.n_nodes+1))
# dfs_df.to_csv("./outputs/dfs_test.csv")
dfs_df

DFS Result:


Unnamed: 0,node,level,parent
1,1,0,-1
2,2,2,6
3,3,3,2
4,4,16,16
5,5,4,3
...,...,...,...
96,96,77,68
97,97,78,88
98,98,79,87
99,99,76,73


In [166]:
bfs = BFS(graph, 1)
bfs.search()

print("BFS Result:")
bfs_df = pd.DataFrame(list(zip(range(1, bfs.graph.n_nodes+1), bfs.level, bfs.parent)), columns=["node", "level", "parent"], index=np.arange(1, bfs.graph.n_nodes+1))
# bfs_df.to_csv("./outputs/bffs_test.csv")
bfs_df

BFS Result:


Unnamed: 0,node,level,parent
1,1,0,-1
2,2,2,6
3,3,2,15
4,4,2,96
5,5,2,15
...,...,...,...
96,96,1,1
97,97,1,1
98,98,2,6
99,99,1,1


In [None]:
########################################

In [None]:
minpath = MinimumPath(graph)

print("Minpath Distance:", minpath.get_distance(1, 3))
print("Minpath Diameter:", minpath.get_diameter())

In [None]:
components = Components(graph)

with open("./outputs/components_test.txt", mode="w") as out:
    for line in components.get_components():
        out.write(str(line))
        out.write("\n")