In [6]:
# import algos and graphs
import sys
import os
import networkx as nx
import matplotlib.pyplot as plt
import gzip
import random
import time
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Add parent directory to path so algorithms can be imported
sys.path.append(os.path.abspath("../algorithms"))

# Import algorithms
#from greedy import greedy_clique
#from local_search import local_search
from randomized import randomized_max_clique
from simulated_annealing import simulated_annealing_with_restarts
from bron_kerbosch import bron_kerbosch_with_pivot, bron_kerbosch_basic

print("All algorithms imported successfully.")


All algorithms imported successfully.


In [None]:
#import graphs
# --- Download Graph Datasets (Small + Large) ---

datasets = {
    #"small": [
        # ("karate_club", "https://github.com/mlabonne/graph-datasets/blob/main/node_classification/karate-club/karate.gml"),
        # ("adjnoun_adj", "https://networkrepository.com/adjnoun-adjacency.php"),
        # ("game_of_thrones", "https://chatox.github.io/networks-science-course/practicum/data/game-of-thrones/"),
        #("marvel_heroes", "https://chatox.github.io/networks-science-course/practicum/data/marvel-hero.csv"),
        #("student_cooperation", "https://chatox.github.io/networks-science-course/practicum/data/student-cooperation.graphml"),
        # ("flavor_network", "https://chatox.github.io/networks-science-course/practicum/data/flavor-network/"),
        # ("hamsterster", "https://networkrepository.com/soc-hamsterster.php"),
        # ("ogdos_100", "<link-to-OGDOS-graph-~100nodes>"),
        # ("brock200_2", "https://turing.cs.hbg.psu.edu/txn131/graphs/brock200_2.clq"),
        # ("c-fat200-5", "https://turing.cs.hbg.psu.edu/txn131/graphs/c-fat200-5.clq"),
    #],
    "large": [
        ("facebook_combined", "https://snap.stanford.edu/data/facebook_combined.txt.gz"),
        # commented because these graphs have their edges in lakhs
        # ("web-Google", "https://snap.stanford.edu/data/web-Google.txt.gz"),
        # ("amazon0601", "https://snap.stanford.edu/data/amazon0601.txt.gz"),
    ],
}

# --- Create directories and download ---
for size, graphs in datasets.items():
    os.makedirs(f"raw_graphs/{size}", exist_ok=True)

    for name, url in graphs:
        filename = os.path.basename(url)
        path = f"raw_graphs/{size}/{filename}"

        if os.path.exists(path):
            print(f"‚ö° Skipping {name}: already downloaded ({path})")
            continue

        print(f"‚¨áÔ∏è Downloading {name} ...")

        try:
            with requests.get(url, stream=True, verify=False, timeout=60) as r:
                r.raise_for_status()  # raise error if download fails
                with open(path, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            print(f"‚úÖ Saved: {path}\n")

        except requests.exceptions.RequestException as e:
            print(f"‚ùå Failed to download {name}: {e}\n")

print("üéØ All requested graph datasets processed.")


‚¨áÔ∏è Downloading karate_club ...
‚ùå Failed to download karate_club: 404 Client Error: Not Found for url: https://github.com/mlabonne/graph-datasets/blob/main/node_classification/karate-club/karate.gml

‚¨áÔ∏è Downloading adjnoun_adj ...
‚ùå Failed to download adjnoun_adj: 406 Client Error: Not Acceptable for url: https://networkrepository.com/adjnoun-adjacency.php

‚ö° Skipping game_of_thrones: already downloaded (raw_graphs/small/)
‚¨áÔ∏è Downloading marvel_heroes ...
‚úÖ Saved: raw_graphs/small/marvel-hero.csv

‚¨áÔ∏è Downloading student_cooperation ...
‚úÖ Saved: raw_graphs/small/student-cooperation.graphml

‚ö° Skipping flavor_network: already downloaded (raw_graphs/small/)
‚¨áÔ∏è Downloading hamsterster ...
‚ùå Failed to download hamsterster: 406 Client Error: Not Acceptable for url: https://networkrepository.com/soc-hamsterster.php

‚¨áÔ∏è Downloading ogdos_100 ...
‚ùå Failed to download ogdos_100: Invalid URL '<link-to-OGDOS-graph-~100nodes>': No scheme supplied. Perhaps you 

In [10]:
def load_graph_large(path):
    """
    Load a large graph in SNAP format (.txt or .txt.gz).
    SNAP format: each line represents an edge "u v"
    """
    ext = os.path.splitext(path)[1]

    # For .gz compressed files
    if ext == ".gz":
        with gzip.open(path, 'rt') as f:
            G = nx.read_edgelist(f, comments="#", nodetype=int)
    # For plain .txt files
    elif ext == ".txt":
        G = nx.read_edgelist(path, comments="#", nodetype=int)
    else:
        raise ValueError(f"Unsupported format for large graphs: {ext}")

    return G


def save_as_adjlist(G, output_path):
    """
    Save the graph in adjacency list format:
    Each line: node: neighbor1 neighbor2 neighbor3 ...
    """
    with open(output_path, "w") as f:
        for node in sorted(G.nodes()):
            neighbors = " ".join(str(n) for n in sorted(G.neighbors(node)))
            f.write(f"{node}: {neighbors}\n")


def convert_large_graphs(input_folder, output_folder):
    """
    Convert all SNAP-format graphs from input_folder to adjacency list format,
    and save them in output_folder.
    """
    # ‚úÖ Ensure output folder exists (relative to project root)
    os.makedirs(output_folder, exist_ok=True)

    for fname in os.listdir(input_folder):
        path = os.path.join(input_folder, fname)
        if not os.path.isfile(path):
            continue

        print(f"üîÑ Converting {fname}...")

        try:
            # 1Ô∏è‚É£ Load graph
            G = load_graph_large(path)

            # 2Ô∏è‚É£ Generate output filename (replace .txt/.gz with .adj)
            out_name = os.path.splitext(fname)[0] + ".adj"
            out_path = os.path.join(output_folder, out_name)

            # 3Ô∏è‚É£ Save adjacency list
            save_as_adjlist(G, out_path)

            print(f"‚úÖ Saved adjacency list: {out_path} ({len(G.nodes())} nodes, {len(G.edges())} edges)\n")

        except Exception as e:
            print(f"‚ùå Skipping {fname}: {e}\n")


# --- Run the conversion ---
if __name__ == "__main__":
    # ‚úÖ Ensure paths point to the project root (not /experiments)
    convert_large_graphs("raw_graphs/large", "../data/large_graphs")



üîÑ Converting facebook_combined.txt.gz...
‚úÖ Saved adjacency list: ../data/large_graphs/facebook_combined.txt.adj (4039 nodes, 88234 edges)



In [None]:
# Greedy

In [None]:
# Local search

In [None]:
# randomized

In [None]:
# local_random

In [None]:
# bron kerbosch