In [5]:

import pandas as pd
from multiprocessing import Pool, cpu_count, Queue
from tqdm import tqdm

class Graph():
    def __init__(self, node_list):
        self.node_list = node_list
        self.num_node = len(node_list)
        self.graph = {name:{} for name in node_list.keys()}
        for name, node in tqdm(node_list.items(), desc="BUILD GRAPH"):
            for neighbor in node.get_neighbors():
                self.graph[name][neighbor] = 1
                                        
        self.paths = {start:{} for start in self.node_list}

        pool = Pool(cpu_count())  # multiprocessing pool
        paths = []

        # calculates shortestpath and show the result on "paths" by multiprocessing
        for result in tqdm(pool.imap_unordered(self.save_shortestPath, enumerate(list(self.node_list)[:-1])),
                        desc="GET SHORTEST PATH", total=len(self.node_list)-1):
            paths += result

        for path in paths:
            self.paths[path[0]][path[1]] = path[2]
            self.paths[path[1]][path[0]] = path[2][::-1]

        # END multiprocessing
        pool.close()
        pool.join()

    def save_shortestPath(self, tup):
        i = tup[0]
        start = tup[1]
        res = []
        for end in list(self.node_list)[i+1:]:
            shortestPath = self.shortestPathBFS(start, end)
            res.append((start,end,shortestPath))  # save tuple
        return res

    def get_neighbors(self, node_name):
        return self.graph[node_name].keys()

    def get_graph(self):
        return self.graph

    def shortestPathBFS(self, start, end):
        visited = set([start])
        prev = {}
        queue = [start]  # queue for bfs
        while len(queue)>0:
            node = queue.pop(0)  # queue node
            if node == end: 
                break
            for neighbor in self.get_neighbors(node):
                if neighbor not in visited:
                    queue.append(neighbor)
                    visited.add(neighbor)
                    prev[neighbor] = node #queue

        def path(node):
            shortest_path = [node]
            while node != start:
                node = prev[node]  # prev path
                shortest_path.insert(0, node)
            return shortest_path

        return path(node)

    def get_degree_centrality(self, node_name: str):
        return len(self.graph[node_name].keys())  

    def get_closeness_centrality(self, node_name: str):
        closeness = 0
        for path in self.paths[node_name].values():
            closeness += len(path)  # path closeness
        return (self.num_node - 1) / max(1, closeness)  # closeness 

    def get_betweenness_centrality(self, node_name: str):
        total = 0  #shortest path 
        betweenness = 0
        for other_node in self.node_list:
            if node_name != other_node:
                for path in self.paths[other_node].values():
                    total += 1
                    if node_name in path:  # path
                        betweenness += 1  # counting
        return node_name, betweenness / total

class Node():
    def __init__(self, name):
        self.name = name
        self.accessibility = {}

    def add_edge(self, airport):  # edge counting (accessibility)
        if airport.name in self.accessibility:
            self.accessibility[airport.name] += 1
            airport.accessibility[self.name] += 1
        else:
            self.accessibility[airport.name] = 1
            airport.accessibility[self.name] = 1

    def get_neighbors(self):
        return self.accessibility.keys()

if __name__ == "__main__":
    df = pd.read_csv("/content/Test3.csv")  # pandas csv 
    source = df['From Node']
    dest = df['Topic Category']
    node_list = {}  # name : node

    for s, d in zip(source, dest):
        if s not in node_list:  # dictionary 
            ns = Node(s)
            node_list[s] = ns
        if d not in node_list:  # dictionary 
            nd = Node(d)
            node_list[d] = nd
        node_list[s].add_edge(node_list[d])  # dictionary 

    graph = Graph(node_list)

    # degree centrality
    degree_centrality = []
    for name in tqdm(node_list, desc="CLOSENESS CENTRALITY"):
        degree_centrality.append((name, graph.get_degree_centrality(name)))
    degree_centrality = pd.DataFrame(degree_centrality, columns = ['From Node', 'degree_centrality'])
    degree_centrality = degree_centrality.sort_values(['degree_centrality'], ascending=[False])
    degree_centrality.to_csv('csv file')

    # closeness centrality
    closeness_centrality = []
    for name in tqdm(node_list, desc="CLOSENESS CENTRALITY"):
        closeness_centrality.append((name, graph.get_closeness_centrality(name)))
    closeness_centrality = pd.DataFrame(closeness_centrality, columns = ['From Node', 'closeness_centrality'])
    closeness_centrality = closeness_centrality.sort_values(['closeness_centrality'], ascending=[False])
    closeness_centrality.to_csv('csv file')

    # betweenness centrality
    pool = Pool(cpu_count())
    betweenness_centrality = []
    for name, betweenness in tqdm(pool.imap_unordered(graph.get_betweenness_centrality, node_list), desc="BETWEENNESS CENTRALITY"):
        betweenness_centrality.append((name, betweenness))  # betweenness 계산에 multiprocessing 
    betweenness_centrality = pd.DataFrame(betweenness_centrality, columns = ['From Node', 'betweenness_centrality'])
    betweenness_centrality = betweenness_centrality.sort_values(['betweenness_centrality'], ascending=[False])
    betweenness_centrality.to_csv('csv file')

    # multiprocessing
    pool.close()
    pool.join()

    print(degree_centrality)
    print(closeness_centrality)
    print(betweenness_centrality)

BUILD GRAPH: 100%|██████████| 8/8 [00:00<00:00, 58867.42it/s]
GET SHORTEST PATH: 100%|██████████| 7/7 [00:00<00:00, 1580.37it/s]
CLOSENESS CENTRALITY: 100%|██████████| 8/8 [00:00<00:00, 49272.29it/s]
CLOSENESS CENTRALITY: 100%|██████████| 8/8 [00:00<00:00, 43862.00it/s]
BETWEENNESS CENTRALITY: 8it [00:00, 60677.09it/s]


                From Node  degree_centrality
1        Beauty & Hygiene                  4
3    Cleaning & Household                  4
0                       0                  3
5                       2                  3
7                       3                  3
4                       1                  2
6    Gourmet & World Food                  2
2  Kitchen, Garden & Pets                  1
                From Node  closeness_centrality
1        Beauty & Hygiene              0.411765
3    Cleaning & Household              0.411765
0                       0              0.368421
5                       2              0.368421
7                       3              0.368421
4                       1              0.333333
6    Gourmet & World Food              0.304348
2  Kitchen, Garden & Pets              0.280000
                From Node  betweenness_centrality
1        Beauty & Hygiene                0.469388
0                       0                0.428571
5            