In [None]:
import csv
import ast
import networkx as nx
from itertools import combinations
import matplotlib.pyplot as plt
from collections import defaultdict
import re

In [None]:
class DFSCode:
    def __init__(self):
        self.edges = []

    def add_edge(self, edge):
        self.edges.append(edge)

    def to_tuple(self):
        return tuple(self.edges)

    def __len__(self):
        return len(self.edges)

In [None]:
class Graph:
    def __init__(self):
        self.edges = []
        self.nodes = set()
    
    def add_edge(self, u, v):
        self.edges.append((u, v))
        self.nodes.update([u, v])
    
    def to_networkx(self):
        g = nx.Graph()
        g.add_edges_from(self.edges)
        return g

    def get_dfs_code(self):
        dfs_code = DFSCode()
        for u, v in sorted(self.edges):
            dfs_code.add_edge((u, v))
        return dfs_code
    
    def __repr__(self):
        return f"Graph with edges: {self.edges}"

In [None]:
class GSpan:
    def __init__(self, graphs, min_support, max_nodes):
        self.graphs = graphs
        self.min_support = min_support
        self.max_nodes = max_nodes
        self.frequent_subgraphs = []
        self.dfs_codes = set()

    def run(self):
        initial_nodes = self.get_initial_nodes()
        subgraphs = [Graph() for _ in initial_nodes]
        
        for i, node in enumerate(initial_nodes):
            subgraphs[i].nodes.add(node)
            dfs_code = subgraphs[i].get_dfs_code()
            self.expand([subgraphs[i]], dfs_code)

    def get_initial_nodes(self):
        node_support = defaultdict(int)
        for graph in self.graphs:
            for node in graph.nodes:
                node_support[node] += 1

        initial_nodes = [node for node, support in node_support.items() if support >= self.min_support]
        return initial_nodes
    
    def expand(self, subgraphs, parent_dfs_code):
        while subgraphs:
            new_subgraphs = []
            for subgraph in subgraphs:
                support = self.get_support(subgraph)
                if support >= self.min_support:
                    dfs_code = subgraph.get_dfs_code()
                    if dfs_code.to_tuple() not in self.dfs_codes:
                        self.dfs_codes.add(dfs_code.to_tuple())
                        self.frequent_subgraphs.append(subgraph)

                        if len(subgraph.nodes) < self.max_nodes:
                            extensions = self.get_extensions(subgraph)
                            for extension in extensions:
                                new_subgraph = Graph()
                                new_subgraph.nodes = subgraph.nodes.union([extension])
                                new_subgraph.edges = self.get_edges_for_nodes(new_subgraph.nodes)
                                new_dfs_code = new_subgraph.get_dfs_code()
                                new_subgraphs.append(new_subgraph)
            subgraphs = new_subgraphs

    def get_support(self, subgraph):
        count = 0
        subgraph_nx = subgraph.to_networkx()
        for graph in self.graphs:
            graph_nx = graph.to_networkx()
            if nx.isomorphism.GraphMatcher(graph_nx, subgraph_nx).subgraph_is_isomorphic():
                count += 1
        return count
    
    def get_extensions(self, subgraph):
        extensions = set()
        for node in subgraph.nodes:
            for graph in self.graphs:
                if node in graph.nodes:
                    for neighbor in graph.nodes:
                        if neighbor not in subgraph.nodes:
                            extensions.add(neighbor)
        return list(extensions)

    def get_edges_for_nodes(self, nodes):
        edges = []
        node_list = list(nodes)
        for i in range(len(node_list)):
            for j in range(i + 1, len(node_list)):
                edges.append((node_list[i], node_list[j]))
        return edges


In [154]:
def parse_edges(edge_str):
    # Sử dụng biểu thức chính quy để tìm các cặp cạnh (u,v)
    edge_pattern = r'\((\d+),\s*(\d+)\)'
    edges = re.findall(edge_pattern, edge_str)
    return [(int(u), int(v)) for u, v in edges]

def read_graphs_from_csv(file_path):
    graphs = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)
        row_count = 0         
        for row in reader:  
            if row_count < 2:  # Chỉ đọc n hàng tiếp theo  
                edge_str = row[0]  
                edges = parse_edges(edge_str)  
                graph = Graph()  
                for edge in edges:  
                    graph.add_edge(edge[0], edge[1])  
                graphs.append(graph)  
                row_count += 1  
            else:  
                break  
    return graphs

def visualize_graph(graph, title="Graph"):
    g_nx = graph.to_networkx()
    print(f'Nodes: {g_nx.nodes()}')
    # pos = nx.spring_layout(g_nx)  # Tạo layout cho đồ thị
    # plt.figure()
    # nx.draw(g_nx, pos, with_labels=True, node_color='skyblue', edge_color='black', node_size=1500, font_size=12)
    # plt.title(title)
    # plt.show()


In [155]:
# Đường dẫn đến file CSV
file_path = "C:\\Users\\ACER\\OneDrive\\Desktop\\FinalProject_GM\\Task1\\FrequentSubgraphMining\\dataset.csv"

# Đọc dữ liệu đồ thị từ file CSV
graphs = read_graphs_from_csv(file_path)
graph_list = []

# Vẽ tất cả các đồ thị ban đầu
for i, graph in enumerate(graphs):
    if len(graph.nodes) == 0:  # Skip empty graphs
        continue
    visualize_graph(graph, title=f"Original Graph {i+1}")


Nodes: [0, 1, 2, 3, 4, 5, 6]
Nodes: [0, 4, 5, 7, 8, 9, 11, 1, 2, 3, 6, 10, 12]


In [156]:
# Khởi tạo và chạy thuật toán GSpan với ngưỡng hỗ trợ tối thiểu
min_support = 2
gspan = GSpan(graphs, min_support, 3)
gspan.run()

# Vẽ các đồ thị con phổ biến sau khi chạy GSpan
for i, subgraph in enumerate(gspan.frequent_subgraphs):
    visualize_graph(subgraph, title=f"Frequent Subgraph {i+1}")

# "C:\\Users\\ACER\\OneDrive\\Desktop\\FinalProject_GM\\Task1\\FrequentSubgraphMining\\dataset.csv"

Nodes: []
Nodes: [0, 1]
Nodes: [0, 2]
Nodes: [0, 3]
Nodes: [0, 4]
Nodes: [0, 5]
Nodes: [0, 6]
Nodes: [0, 7]
Nodes: [0, 8]
Nodes: [0, 9]
Nodes: [0, 10]
Nodes: [0, 11]
Nodes: [0, 12]
Nodes: [0, 1, 2]
Nodes: [0, 1, 3]
Nodes: [0, 1, 4]
Nodes: [0, 1, 5]
Nodes: [0, 1, 6]
Nodes: [0, 1, 7]
Nodes: [0, 1, 8]
Nodes: [0, 1, 9]
Nodes: [0, 1, 10]
Nodes: [0, 1, 11]
Nodes: [0, 1, 12]
Nodes: [0, 2, 3]
Nodes: [0, 2, 4]
Nodes: [0, 2, 5]
Nodes: [0, 2, 6]
Nodes: [0, 2, 7]
Nodes: [0, 8, 2]
Nodes: [0, 9, 2]
Nodes: [0, 2, 10]
Nodes: [0, 2, 11]
Nodes: [0, 2, 12]
Nodes: [0, 3, 4]
Nodes: [0, 3, 5]
Nodes: [0, 3, 6]
Nodes: [0, 3, 7]
Nodes: [0, 8, 3]
Nodes: [0, 9, 3]
Nodes: [0, 10, 3]
Nodes: [0, 11, 3]
Nodes: [0, 3, 12]
Nodes: [0, 4, 5]
Nodes: [0, 4, 6]
Nodes: [0, 4, 7]
Nodes: [0, 8, 4]
Nodes: [0, 9, 4]
Nodes: [0, 10, 4]
Nodes: [0, 11, 4]
Nodes: [0, 4, 12]
Nodes: [0, 5, 6]
Nodes: [0, 5, 7]
Nodes: [0, 8, 5]
Nodes: [0, 9, 5]
Nodes: [0, 10, 5]
Nodes: [0, 11, 5]
Nodes: [0, 12, 5]
Nodes: [0, 6, 7]
Nodes: [0, 8, 6]
Nodes