In [2]:
from typing import Optional, Any, List


class Graph:
    """
    Graph class
    """
    def __init__(self):
        self._graph = {}

    def add_vertex(self, vertex: str, data: Optional[Any]=None) -> None:
        """
        Adds a vertex to the graph
        :param vertex: the vertex name
        :param data: data associated with the vertex
        """
        if vertex not in self._graph:
            self._graph[vertex] = {'data': data, 'neighbors': {}}

    def add_edge(self, vertex1: str, vertex2: str, data: Optional[Any]=None) -> None:
        """
        Adds an edge to the graph
        :param vertex1: vertex1 key
        :param vertex2: vertex2 key
        :param data: the data associated with the vertex
        """
        if not vertex1 in self._graph or not vertex2 in self._graph:
            raise ValueError("The vertexes do not exist")
        self._graph[vertex1]['neighbors'][vertex2] = data

    def get_neighbors(self, vertex) -> List[str]:
        """
        Get the list of vertex neighbors
        :param vertex: the vertex to query
        :return: the list of neighbor vertexes
        """
        if vertex in self._graph:
            return list(self._graph[vertex]['neighbors'].keys())
        else:
            return []

    def get_vertex_data(self, vertex: str) -> Optional[Any]:
        """
        Gets  vertex associated data
        :param vertex: the vertex name
        :return: the vertex data
        """
        if self.vertex_exists(vertex):
            return self._graph[vertex]['data']
        else:
            return None

    def get_edge_data(self, vertex1: str, vertex2: str) -> Optional[Any]:
        """
        Gets the vertexes edge data
        :param vertex1: the vertex1 name
        :param vertex2: the vertex2 name
        :return: vertexes edge data
        """
        if self.edge_exists(vertex1, vertex2):
            return self._graph[vertex1]['neighbors'][vertex2]
        raise ValueError("The edge does not exist")

    def print_graph(self) -> None:
        """
        Prints the graph
        """
        for vertex, data in self._graph.items():
            print("Vertex:", vertex)
            print("Data:", data['data'])
            print("Neighbors:", data['neighbors'])
            print("")

    def vertex_exists(self, vertex: str) -> bool:
        """
        If contains a vertex
        :param vertex: the vertex name
        :return: boolean
        """
        return vertex in self._graph

    def edge_exists(self, vertex1: str, vertex2: str) -> bool:
        """
        If contains an edge
        :param vertex1: the vertex1 name
        :param vertex2: the vertex2 name
        :return: boolean
        """
        return vertex1 in self._graph and vertex2 in self._graph[vertex1]['neighbors']
    
    def graph_len(self):
        return len(self._graph)
    
    def get_vertices(self):
        return list(self._graph.keys())
    
    def add_vertex_data(self, vertex, data):
        self._graph[vertex]["data"] = data
    

    


In [3]:
from tqdm import tqdm

def import_graph(graph):
    index_title_dict = {}

    with open("vertexes.txt", "r") as file:
        for line in file:
            index = int(line.split(" ")[0])
            title = " ".join(line[:-1].split(" ")[1:])
            index_title_dict[index] = title
            graph.add_vertex(title)

    with open("edges.txt", "r") as file:
        for line in tqdm(file, desc="Loading edges", total=101409330):
            art1 = int(line[:-1].split(" ")[0])
            art2 = int(line[:-1].split(" ")[1])
            q = int(line[:-1].split(" ")[2])
            if art1 in index_title_dict and art2 in index_title_dict:
                graph.add_edge(index_title_dict[art1], index_title_dict[art2], q)

    del index_title_dict
    return graph



In [None]:
graph = import_graph(Graph())

In [None]:

from collections import deque


def convertir_no_dirigido(graph):
    # Volverlo no direccional

    vertices = graph.get_vertices()
    for vertex in tqdm(vertices, desc = "Adding edges to make it non-directional"):
        neighbors = graph.get_neighbors(vertex)
        for neighbor in neighbors:
            if not graph.edge_exists(neighbor, vertex):
                graph.add_edge(neighbor, vertex)


    return graph

graph = convertir_no_dirigido(graph)

In [None]:

def calcular_conexidad(graph:Graph):
    

    visitados = set()
    componentes = 0


    

    def dfs(vertex):
        stack = deque()
        stack.append(vertex)
        visitados.add(vertex)
        while (stack):
            vertex = stack.pop()
            vecinos = graph.get_neighbors(vertex)
            for vecino in vecinos:
                    if vecino not in visitados:
                        stack.append(vecino)
                        visitados.add(vecino)

    for vertex in tqdm(graph.get_vertices(), desc ="Calculando componentes conexas"):
       if (vertex not in visitados):
           componentes += 1
           dfs(vertex)
    return componentes


componentes = calcular_conexidad(graph)
print("No es debilmente conexo" if componentes > 0 else "Es debilmente conexo")
print("Cantidad de componentes: ", componentes)

            

In [None]:
def caminos_minimos(graph: Graph, v1, v2):
    queue1 = deque()
    queue2 = deque()
    anterior1 = {}
    anterior2 = {}
    visitados1 = set()
    visitados2 = set()

    queue1.append(v1)
    queue2.append(v2)

    while(queue1 and queue1):
        node1 = queue1.popleft()
        node2 = queue2.popleft()


        if node1 not in visitados1: visitados1.add(node1)
        if node2 not in visitados2: visitados2.add(node2)
        if node2 in visitados1:
            return reconstruir_camino(anterior1, anterior2, node2)
        
        for vecino in graph.get_neighbors(node1):
            if vecino not in visitados1: 
                anterior1[vecino] = node1
                queue1.append(vecino)
        
        for vecino in graph.get_neighbors(node2):
            if vecino not in visitados2:
                anterior2[vecino] = node2
                queue2.append(vecino)

    print("No hay camino posible")
    return None


def reconstruir_camino(camino1, camino2, intermedio):
    proximo = camino2[intermedio]
    anterior = intermedio
    while True:
        camino1[proximo] = anterior
        anterior = proximo
        try:
            proximo = camino2[anterior]
        except:
            break
    return camino1

def imprimir_camino(v1, v2, previous):
    if v2 == v1:
        print(v1)
        return
    
    imprimir_camino(v1, previous[v2], previous)
    print(v2)

v1 = "Samsung Galaxy J7 Prime"
v2 = "Guillermo Francella"
graph = import_graph(Graph())
camino = caminos_minimos(graph, v1, v2)
if camino: imprimir_camino(v1, v2, camino)

In [8]:

import numpy as np
from collections import deque

def encontrar_caminos_minimos(graph: Graph, vertex1, vertex2):
    queue = deque()
    distancia = {}
    anterior = {}
    visitados = set()

    distancia[vertex1] = 0
    queue.append(vertex1)


    while (queue):
        vertex = queue.popleft()

        for neighbor in graph.get_neighbors(vertex):

            if  neighbor not in visitados:

                visitados.add(neighbor)
                queue.append(neighbor)
                distancia[neighbor] = distancia[vertex] + 1
                anterior[neighbor] = [vertex]

            elif distancia[neighbor] == distancia[vertex] + 1:
                anterior[neighbor].append(vertex)
    return anterior


def reconstruir_caminos2(beggining, end, graph, weight, min_max, anterior):
    if beggining == end:
        return ([beggining], weight)
    
    caminos_y_pesos = []
    for prev in anterior[end]:

        edge_weight = graph.get_edge_data(prev, end)
        total_weight = weight + edge_weight 
        camino, peso_total = reconstruir_caminos2(beggining, prev, graph, total_weight, min_max, anterior)
        camino = camino + [end] 
        caminos_y_pesos.append((camino, peso_total))
    
    # Desempaquetar caminos y pesos
    caminos, pesos = zip(*caminos_y_pesos)
    minmax = min_max(pesos)
    minmax_index = pesos.index(minmax)
    return (caminos[minmax_index], minmax)




In [None]:

v1 = "Samsung Galaxy J7 Prime"
v2 = "Guillermo Francella"
# Uso de la función
anterior = encontrar_caminos_minimos(graph, v1, v2)
camino_min = reconstruir_caminos2(v1, v2, graph, 0, min, anterior)
camino_max = reconstruir_caminos2(v1, v2, graph, 0, max, anterior)

print(f"Camino minimo de peso min ({camino_min[1]}): {' -> '.join(camino_min[0])}")
print(f"Camino minimo de peso max ({camino_max[1]}): {' -> '.join(camino_max[0])}")

In [None]:
from collections import deque

def distancia_maxima(vertex1, graph: Graph):
    queue = deque()
    visitados = set()
    distancia = {}
    anterior = {}  # Para reconstruir el camino

    distancia[vertex1] = 0
    queue.append(vertex1)
    visitados.add(vertex1)

    while queue:
        vertex = queue.popleft()
        for neighbor in graph.get_neighbors(vertex):
            if neighbor not in visitados:
                queue.append(neighbor)
                visitados.add(neighbor)
                distancia[neighbor] = distancia[vertex] + 1
                anterior[neighbor] = vertex  


    max_vertex = max(distancia, key=distancia.get)
    max_distance = distancia[max_vertex]


    camino = []
    while max_vertex is not None:
        camino.append(max_vertex)
        max_vertex = anterior.get(max_vertex, None)
    camino.reverse()  

    return max_distance, camino



def estimar_diametro(graph: Graph):
    diametros = []
    caminos = []
    for i in tqdm(range(10), desc="Calculando 10 BFS"):
        vertices = graph.get_vertices()
        len_vertices = len(vertices)
        vertex = vertices[np.random.randint(0, len_vertices)]

        max_distance, max_path = distancia_maxima(vertex, graph)
        diametros.append(max_distance)
        caminos.append(max_path)

    # Encuentra el índice del diámetro máximo
    max_index = diametros.index(max(diametros))

    return caminos[max_index], diametros[max_index]


camino, diametro = estimar_diametro(graph)
print("El diametro es: ", diametro)
print("Camino minimo más largo: \n", camino)

In [None]:
def articulos_sin_anterior(graph:Graph):
    validos = set()
    for vertex in tqdm(graph.get_vertices(), desc="Buscando vertices conectados"):
        neighbors = graph.get_neighbors(vertex)
        for neighbor in neighbors:
            if neighbor not in validos:
                validos.add(neighbor)

    no_visitados = set()
    for vertex in tqdm(graph.get_vertices(), desc= "Buscando vertices no conectados"):
        if vertex not in validos:
            no_visitados.add(vertex)

    print(f"Grafos que no tienen predecesor: {len(no_visitados)} \n Ejemplos:")
    print(list(no_visitados)[:5])

articulos_sin_anterior(graph)

In [None]:
def articulos_sin_vecino(graph:Graph):
    no_neighbors = []
    for vertex in graph.get_vertices():
        if len(graph.get_neighbors(vertex)) == 0:
            no_neighbors.append(vertex)

    print(f"Grafos que no tienen vecinos: {len(no_neighbors)} \n Ejemplos: {no_neighbors[:5]}")

articulos_sin_vecino(graph)

In [None]:
graph = Graph()
graph = import_graph(graph)

In [None]:
def articulos_con_links_correspondidos(graph: Graph):
    articulos_con_links = set()  # Conjunto para almacenar los artículos
    links_correspondidos = []    # Lista para guardar ejemplos de links
    
    for vertex in tqdm(graph.get_vertices(), desc="Buscando artículos con links correspondidos"):
        for neighbor in graph.get_neighbors(vertex):
            # Si existe el enlace en la otra dirección
            if graph.edge_exists(neighbor, vertex):
                articulos_con_links.add(vertex)
                links_correspondidos.append((vertex, neighbor))
                break  # Ya encontramos un link correspondido para este artículo
    
    print(f"Artículos con links correspondidos: {len(articulos_con_links)}")
    print(f"Ejemplos de links correspondidos: {links_correspondidos[:5]}")
    
    return articulos_con_links

# Uso
articulos = articulos_con_links_correspondidos(graph)

In [None]:
def label_propagation(graph: Graph):

    visitados = set()
    label_counts = {}

    for vertex in tqdm(graph.get_vertices(), desc= "Agregando etiquetas iniciales"):
        if len(graph.get_neighbors(vertex)) >= 100:
            graph.add_vertex_data(vertex, vertex)
            label_counts[vertex] = 1


    for i in tqdm(range(10), desc="Calculando etiquetas"):
        for vertex in graph.get_vertices():
            if (vertex not in visitados):
                labels = {}
                for neighbor in graph.get_neighbors(vertex):
                    label = graph.get_vertex_data(neighbor)
                    if label is not None:
                        weight = graph.get_edge_data(vertex, neighbor)
                        if label in labels:
                            labels[label] += weight
                        else:
                            labels[label] = weight


                if labels:
                    new_label = max(labels, key=labels.get)


                    old_label = graph.get_vertex_data(vertex)

                    if (new_label == old_label): # Si no cambia, no revisitar el nodo
                        visitados.add(vertex)
                        continue

                    if old_label is not None:
                        label_counts[old_label] -= 1

                    graph.add_vertex_data(vertex, new_label)

                    if new_label in label_counts:
                        label_counts[new_label] += 1
                    else:
                        label_counts[new_label] = 1


    return label_counts


etiquetas = label_propagation(graph)
etiquetas_reales = {clave: valor for clave, valor in etiquetas.items() if valor > 0}




In [None]:
print(len(etiquetas_reales))
print(etiquetas_reales)

In [None]:
import numpy as np
def random_walks(graph:Graph):
    visitas = {}

    for _ in tqdm(range(3000), desc="Calculando random walks para 100 nodos iniciales"):
        vertex = graph.get_vertices()[np.random.randint(0, graph.graph_len())]
        for _ in range(100):
            if vertex in visitas: visitas[vertex] += 1
            else: visitas[vertex] = 1
            neighbors = graph.get_neighbors(vertex)
            neigh_num = len(neighbors)
            if neigh_num == 0:
                break
            vertex = neighbors[np.random.randint(0, len(neighbors))]

    return max(visitas, key=visitas.get) #llave con mayor numero de visitas

print("El nodo con mayor probabildiad de centralidad es: ", random_walks(graph))


In [None]:
import numpy as np

def reconstruir_caminos_todos(beggining, end, anterior):
    if beggining == end:
        return [[beggining]]  

    todos_los_caminos = []
    
    for prev in anterior[end]:
        caminos_desde_prev = reconstruir_caminos_todos(beggining, prev, anterior)
        for camino in caminos_desde_prev:
            todos_los_caminos.append(camino + [end])

    return todos_los_caminos





def betweenness(graph: Graph):
    vertices = graph.get_vertices()
    densidad = {}
    num_pares = 10

    for _ in tqdm(range(num_pares), desc=f"Calculando caminos mínimos para {num_pares} pares de vértices"):
        vertex1 = np.random.choice(vertices)
        vertex2 = np.random.choice(vertices)
        if vertex1 == vertex2:
            continue  


        anterior = encontrar_caminos_minimos(graph, vertex1, vertex2)
        # Validar si vertex2 es alcanzable
        if vertex2 not in anterior:
            continue  # Pasar al siguiente par si no es alcanzable

        caminos_minimos = reconstruir_caminos_todos(vertex1, vertex2, anterior)
        len_caminos = len(caminos_minimos)
        if len_caminos == 0:
            continue  


        contribuciones = {}
        for camino in caminos_minimos:
            for v in camino[1:-1]:  
                if v not in contribuciones: contribuciones[v] = 1 / len_caminos
                else: contribuciones[v] += 1 / len_caminos

        for v, contribucion in contribuciones.items():
            if v not in densidad: densidad[v] = 1
            else: densidad[v] += contribucion

    return densidad


densidad = betweenness(graph)
print("El nodo central es: ", max(densidad, key=densidad.get))


In [None]:
import numpy as np

def estimar_clustering_por_muestreo(graph, sample_size=100):

    vertices = graph.get_vertices()
    num_vertices = len(vertices)



    nodos_muestra = np.random.choice(vertices, size=sample_size, replace=False)
    clustering_nodos = {}

    for nodo in tqdm(nodos_muestra, desc=f"Calculando clustering para {sample_size} nodos de muestra"):
        vecinos = graph.get_neighbors(nodo)
        grado = len(vecinos)


        if grado < 2:
            clustering_nodos[nodo] = 0
            continue


        enlaces_entre_vecinos = 0
        for i in range(grado):
            for j in range(i + 1, grado):
                if graph.edge_exists(vecinos[i], vecinos[j]):
                    enlaces_entre_vecinos += 1


        clustering_nodos[nodo] = (2 * enlaces_entre_vecinos) / (grado * (grado - 1))


    promedio_clustering = sum(clustering_nodos.values()) / sample_size
    return promedio_clustering, clustering_nodos


promedio, clustering_nodos = estimar_clustering_por_muestreo(graph, sample_size=1000000)
print("Coeficiente de clustering promedio estimado:", promedio)


sorted_clustering = sorted(clustering_nodos.items(), key=lambda x: x[1])
print("Nodos con coeficiente de clustering más bajo:", sorted_clustering[:5])
print("Nodos con coeficiente de clustering más alto:", sorted_clustering[-5:])
