# GraphUniverse

In [1]:
import pickle
import os

# Function to load a pickle file
def load_pickle_file(file_path):
    """
    Load a pickle file and return its contents
    
    Args:
        file_path (str): Path to the pickle file
    
    Returns:
        object: The loaded object from the pickle file
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    try:
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        print(f"Successfully loaded pickle file: {file_path}")
        print(f"Data type: {type(data)}")
        return data
    except Exception as e:
        print(f"Error loading pickle file: {e}")
        return None

# Example usage:
# Replace 'your_file.pkl' with the path to your pickle file
# data = load_pickle_file('your_file.pkl')
# print(data)


# Additional utility functions for pickle files

def inspect_pickle_contents(file_path, max_items=10):
    """
    Inspect the contents of a pickle file without fully loading large objects
    
    Args:
        file_path (str): Path to the pickle file
        max_items (int): Maximum number of items to display if data is iterable
    """
    data = load_pickle_file(file_path)
    if data is None:
        return
    
    print(f"\n=== Pickle File Analysis ===")
    print(f"File: {file_path}")
    print(f"Type: {type(data)}")
    print(f"Size in memory: {len(str(data))} characters (string representation)")
    
    # Handle different data types
    if hasattr(data, '__len__') and not isinstance(data, str):
        print(f"Length: {len(data)}")
    
    if isinstance(data, dict):
        print(f"Dictionary keys: {list(data.keys())[:max_items]}")
        if len(data) > max_items:
            print(f"... and {len(data) - max_items} more keys")
    elif isinstance(data, (list, tuple)):
        print(f"First {min(max_items, len(data))} items:")
        for i, item in enumerate(data[:max_items]):
            print(f"  [{i}]: {type(item)} - {str(item)[:100]}")
        if len(data) > max_items:
            print(f"... and {len(data) - max_items} more items")
    else:
        # For other types, just show a preview
        data_str = str(data)
        if len(data_str) > 200:
            print(f"Preview: {data_str[:200]}...")
        else:
            print(f"Content: {data_str}")

def list_pickle_files(directory="."):
    """
    List all pickle files in a directory
    
    Args:
        directory (str): Directory to search for pickle files
    """
    pickle_extensions = ['.pkl', '.pck', '.pickle']
    pickle_files = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            if any(file.endswith(ext) for ext in pickle_extensions):
                pickle_files.append(os.path.join(root, file))
    
    if pickle_files:
        print(f"Found {len(pickle_files)} pickle files:")
        for file in pickle_files:
            print(f"  - {file}")
    else:
        print(f"No pickle files found in {directory}")
    
    return pickle_files

In [2]:
# Import the graph generation classes
from graph_universe.model import GraphUniverse, GraphSample, GraphFamilyGenerator
from utils.visualizations import (
    plot_graph_communities, 
    plot_universe_degree_centers
)

# First, let's see what pickle files are available in the current directory and subdirectories
print("Searching for pickle files...")
pickle_files = list_pickle_files("/home/gbg141/TopoBench")

# If you have a specific pickle file path, replace it here:
your_pickle_file = "/home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/2_to_5_communities_per_graph/pyg_graph_list_community_detection.pkl"

# Load and inspect the file
if os.path.exists(your_pickle_file):
    # Option 1: Just load the file
    data = load_pickle_file(your_pickle_file)
    
    # Option 2: Load and inspect the contents
    inspect_pickle_contents(your_pickle_file)
else:
    print(f"Pickle file not found. Available files: {pickle_files}")



Searching for pickle files...
Found 12 pickle files:
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_to_100/10_communities_per_graph/pyg_graph_list_community_detection.pkl
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_to_100/10_communities_per_graph/graph_universe.pkl
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_to_100/5_to_10_communities_per_graph/pyg_graph_list_community_detection.pkl
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_to_100/5_to_10_communities_per_graph/graph_universe.pkl
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_to_100/2_to_5_communities_per_graph/pyg_graph_list_community_detection.pkl
  - /home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter/family_k10/graph_size_50_

In [None]:
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.io import fs
import pickle
import os.path as osp
from omegaconf import DictConfig

parameters = {
    "data_domain": "graph",
    "data_type": "GraphUniverse",
    "data_name": "GraphUniverse",
    "K": 10,
    "graph_sizes": [50,100],
    "per_graph_communities": [2,5],
    "task": "community_detection",
    "data_dir": "/data/gbg141/TB/datasets/graph/GraphUniverse"
}
parameters = DictConfig(parameters)

class GraphUniverseDataset(InMemoryDataset):
    r"""Dataset class for GraphUniverse datasets.

    Parameters
    ----------
    root : str
        Root directory where the dataset will be saved.
    name : str
        Name of the dataset.
    parameters : DictConfig
        Configuration parameters for the dataset.
    **kwargs : dict
        Additional keyword arguments.
    """

    def __init__(
        self,
        root: str,
        name: str,
        parameters: DictConfig,
        **kwargs,
    ) -> None:
        self.parameters = parameters
        self.family = f"family_k{parameters.K}"
        self.graph_sizes = f"graph_size_{parameters.graph_sizes[0]}_to_{parameters.graph_sizes[1]}"
        self.per_graph_communities = f"{parameters.per_graph_communities[0]}_to_{parameters.per_graph_communities[1]}_communities_per_graph" #if type(parameters.per_graph_communities)==list else f"{parameters.graph_sizes}_communities_per_graph"
        self.task = f"task_{parameters.task}"
        self.name = "_".join([self.family, self.graph_sizes, self.per_graph_communities, self.task])
        PATH_TO_DATASETS = "/home/gbg141/TopoBench/tutorials/first_graphuniverse_datasets_K_parameter"
        self.dataset_path = "/".join([PATH_TO_DATASETS, self.family, self.graph_sizes, self.per_graph_communities, f"pyg_graph_list_{parameters.task}.pkl"])
        super().__init__(
            root,
        )
        self.process()

    @property
    def raw_dir(self) -> str:
        """Return the path to the raw directory of the dataset.

        Returns
        -------
        str
            Path to the raw directory.
        """
        return osp.join(
            self.root,
            self.name,
            "raw",
        )

    @property
    def processed_dir(self) -> str:
        """Return the path to the processed directory of the dataset.

        Returns
        -------
        str
            Path to the processed directory.
        """
        self.processed_root = osp.join(
            self.root,
            self.name,
        )
        return osp.join(self.processed_root, "processed")

    @property
    def raw_file_names(self) -> list[str]:
        """Return the raw file names for the dataset.

        Returns
        -------
        list[str]
            List of raw file names.
        """
        return [f"{self.dataset_path}"]

    @property
    def processed_file_names(self) -> str:
        """Return the processed file name for the dataset.

        Returns
        -------
        str
            Processed file name.
        """
        return "data.pt"

    def download(self) -> None:
        r"""Download the dataset from a URL and saves it to the raw directory.

        Raises:
            FileNotFoundError: If the dataset URL is not found.
        """
        # Nothing to download
        pass


    def process(self) -> None:
        r"""Handle the data for the dataset.
        """

        with open(self.dataset_path, 'rb') as f:
            data_list = pickle.load(f)

        self.data, self.slices = self.collate(data_list)


In [35]:
dataset = GraphUniverseDataset(parameters["data_dir"], None, parameters)

Processing...
Done!


In [36]:
dataset._data

Data(x=[75825, 15], edge_index=[2, 450596], y=[75825])

In [38]:
dataset[500]

Data(x=[51, 15], edge_index=[2, 170], y=[51])