In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path("..").resolve()))


In [None]:
import requests
import pandas as pd

import networkx as nx
from sage.all import *
from objects import *
from utils import gpn


### Fig generator for each graphs class

In [3]:
output_dir = Path.cwd().parent.parent / "docs/figs"
output_dir.mkdir(parents=True, exist_ok=True)
max_nodes = 10


In [4]:
# generates all figures of bipartite graphs up to 8 nodes
for n in range(1, 9):
    try:
        g = BipartiteGraphs(n)
    except RuntimeError:
        continue

    for i, graph in enumerate(g.graphs, start=0):
        file_path = output_dir / "bipartite" / f"bipartite_{n}_nodes_{i}.png"
        g.plot(index=i, filename=str(file_path))


In [5]:
# generates all figures of cubic graphs up to 10 nodes
for n in range(4, max_nodes + 1, 2):
    try:
        g = CubicGraphs(n)
    except RuntimeError:
        continue

    for i, graph in enumerate(g.graphs, start=0):
        file_path = output_dir / "cubic" / f"cubic_{n}_nodes_{i}.png"
        g.plot(index=i, filename=str(file_path))


In [6]:
# generates all figures of triangle-free graphs up to 7 nodes
for n in range(1, 8):
    try:
        g = TriangleFreeGraphs(n)
    except RuntimeError:
        continue

    for i, graph in enumerate(g.graphs, start=1):
        deg_seq = g.degree_sequences[i-1]
        deg_str = "_".join(map(str, deg_seq))
        file_name = f"tf_{deg_str}.png"

        file_path = output_dir / "triangle-free" / file_name
        g.plot(index=i-1, filename=str(file_path))


Graph specific class data generator

In [7]:
max_nodes = 10


In [None]:
# generates all specific class data for further DA
# -> generates all bipartite graphs on 1 up to 10 nodes
# -> generates all cubic graphs on 1 up to 10 nodes
# -> generates all triangle-free graphs on 1 up to 10 nodes

df_data = []

for n in range(1, max_nodes + 1):
    try:
        bg = BipartiteGraphs(n)
    except RuntimeError:
        continue
    for i, graph in enumerate(bg.graphs, start=1):
    
        df_data.append(
            [
                nx.to_graph6_bytes(graph, header=False).decode('utf-8').replace('\n', '').replace('\r', ''),
                "bipartite",
                graph.number_of_nodes(),
                graph.number_of_edges(),
                gpn(graph),
            ]
        )

for n in range(4, max_nodes + 1, 2):
    try:
        cg = CubicGraphs(n)
    except RuntimeError:
        continue

    for i, graph in enumerate(cg.graphs, start=1):

        df_data.append(
            [
                nx.to_graph6_bytes(graph, header=False).decode('utf-8').replace('\n', '').replace('\r', ''),
                "cubic",
                graph.number_of_nodes(),
                graph.number_of_edges(),
                gpn(graph),
            ]
        )

for n in range(1, max_nodes + 1):
    try:
        tg = TriangleFreeGraphs(n)
    except RuntimeError:
        continue

    for i, graph in enumerate(tg.graphs, start=1):

        df_data.append(
            [
                nx.to_graph6_bytes(graph, header=False).decode('utf-8').replace('\n', '').replace('\r', ''),
                "triangle-free",
                graph.number_of_nodes(),
                graph.number_of_edges(),
                gpn(graph),
            ]
        )

df = pd.DataFrame(
    df_data,
    columns=[ "graph6_encoding", "type", "num_nodes", "num_edges", "gpn_num"],
)

df.to_csv("../../data/generated/gpn_class_data.csv", index=False, encoding='utf-8')



In [None]:
# generate all cubic graphs up to 20 nodes

df_data = []

for n in range(21, 20 + 1, 2):
    try:
        cg = CubicGraphs(n)
    except RuntimeError:
        continue

    print(n)
    for i, graph in enumerate(cg.graphs, start=1):

        df_data.append(
            [
                nx.to_graph6_bytes(graph, header=False).decode('utf-8').replace('\n', '').replace('\r', ''),
                "cubic",
                graph.number_of_nodes(),
                graph.number_of_edges(),
                gpn(graph),
            ]
        )

df = pd.DataFrame(
    df_data,
    columns=[ "graph6_encoding", "type", "num_nodes", "num_edges", "gpn_num"],
)



4
6
8
10
12
14
16
18


In [10]:

df.to_csv("../../data/generated/cubic_graphs_upto18.csv", index=False, encoding='utf-8')


In [None]:
# generator for all cubic graphs on 20 nodes
df_data = []
cg = CubicGraphs(20)

for i, graph in enumerate(cg.graphs, start=1):
    df_data.append(
        [
            nx.to_graph6_bytes(graph, header=False)
            .decode("utf-8")
            .replace("\n", "")
            .replace("\r", ""),
            "cubic",
            graph.number_of_nodes(),
            graph.number_of_edges(),
            gpn(graph),
        ]
    )

df = pd.DataFrame(
    df_data,
    columns=["graph6_encoding", "type", "num_nodes", "num_edges", "gpn_num"],
)

df.to_csv("../../data/generated/cubic_graphs_on20_nodes.csv")


In [None]:
#  nodes based generator (it generates all graphs based on number of nodes)
df_data = []
for n in range(1, 10):
    print(n)
    for g in graphs.nauty_geng(f"{n} -c"):
        g = g.networkx_graph()

        if not nx.is_connected(g):
            print("g is not connected")
        
        graph6_encoding = nx.to_graph6_bytes(g, header=False).decode('utf-8').replace('\n', '').replace('\r', '')
        num_nodes = g.number_of_nodes()
        num_edges = g.number_of_edges()
        gpn_value = gpn(g)

        df_data.append(
            [
                graph6_encoding,
                num_nodes,
                num_edges,
                gpn_value
            ]
        )

columns = [
    "graph6_encoding",
    "num_nodes",
    "num_edges",
    "gpn_num",
]

df = pd.DataFrame(df_data, columns=columns)
df.to_csv("../../data/generated/all_graphs_9n.csv", index=False)


### LOAD GE-data files (graph6 files)
(required for edge based generator)

In [None]:
# load edge based graphs data (all simple connected graphs on `n` number of nodes)
for i in range(1, 16):
    url = f"https://users.cecs.anu.edu.au/~bdm/data/ge{i}c.g6"
    local_filename = Path("../../data/loaded/ge_files/") / f"edge-based-ge{i}c.g6"
    
    print(f"Downloading {url} ...")
    
    try:
        resp = requests.get(url)
        resp.raise_for_status()
        
        with open(local_filename, "wb") as f:
            f.write(resp.content)
        
        print(f"saved as {local_filename}")
        
    except requests.RequestException as e:
        print(f"{url}: {e}")


In [None]:
# edge-based generator (generate all connected simple graphs on `n` number of edges)
df_data = []

for i in range(1, 15):
    print(f"loading {i}")
    local_file = Path("../../data/loaded/ge_files/") / f"edge-based-ge{i}c.g6"

    with open(local_file, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            g = nx.from_graph6_bytes(line.encode())
            df_data.append([g.number_of_nodes(), g.number_of_edges(), gpn(g)])

columns = ["num_nodes", "num_edges", "gpn_num"]
df = pd.DataFrame(df_data, columns=columns)
df.to_csv("../../data/generated/all_graphs_edge_data.csv", index=False)


In [None]:
# based on hypotesis; bipartite graphs are the group that has optimal gpn value
# perform search of all bp graphs on 11 and 12 notes
# generating all bp graphs

for n in [11, 12]:
    rows = []

    bp_graphs = BipartiteGraphs(n)

    for graph in bp_graphs.graphs:
        graph_id = nx.to_graph6_bytes(graph, header=False).decode('utf-8').replace('\n', '').replace('\r', ''),
        gpn_value = gpn(graph)

        rows.append({
            "id": graph_id,
            "gpn": gpn_value,
        })

    df = pd.DataFrame(rows)

    df.to_csv(f"../../data/generated/bp_graph_data_n{n}.csv", index=False)
