In [None]:
import os
import networkx as nx
from node2vec import Node2Vec
import pandas as pd

# def embeddin_generator(emb_size, gml_directory, gmlfile_prefix, gmlfile_suffix):
def run_node2vec_on_gml_files(gml_directory, output_directory):
    # Loop through all files in the source directory
    for subject_id in range(1, 162):  # assuming IDs from 001 to 161
        filename = f'GNN input_LPCMCI_Sub{subject_id:03}_Yeo-7-liberal_space.gml'
        file_path = os.path.join(gml_directory, filename)
        
        # Define output filename and path
        output_filename = f'Node2Vec_LPCMCI_Sub{subject_id:03}_Yeo-7.csv'
        output_path = os.path.join(output_directory, output_filename)
        
        # Check if output file already exists, skip if it does
        if os.path.exists(output_path):
            print(f"File {output_filename} already exists in destination. Skipping...")
            continue

        # Skip if input file does not exist
        if not os.path.exists(file_path):
            print(f"File {filename} not found in source. Skipping...")
            continue
        
        # Load the graph
        G = nx.read_gml(file_path)
        
        # Run Node2Vec
        node2vec = Node2Vec(G, dimensions=64, walk_length=10, num_walks=100, workers=4)
        model = node2vec.fit(window=10, min_count=1, batch_words=4)
        
        # Get node embeddings and save them as DataFrame
        embeddings = {str(node): model.wv[str(node)] for node in G.nodes()}
        embedding_df = pd.DataFrame.from_dict(embeddings, orient='index')
        
        # Save embedding dataframe to CSV
        embedding_df.to_csv(output_path)
        print(f"Node2Vec embedding for {filename} has been successfully saved as {output_filename}")

# Example usage with specified directories
gml_directory = r".\\data\\Yeo-7-liberal_space_mask - LPCMCI"
output_directory = r".\\data\\graph_embeddings_64"

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

run_node2vec_on_gml_files(gml_directory, output_directory)
