In [1]:
# graph_reasoning_colab.ipynb (Python code cells below are meant for notebook)

# Step 1: Install Required Packages
!pip install networkx huggingface_hub pandas numpy scikit-learn openai

# Step 2: Import Libraries
import networkx as nx
import pandas as pd
import numpy as np
import pickle
from huggingface_hub import hf_hub_download
from sklearn.metrics.pairwise import cosine_similarity
import openai

# Step 3: Load the Graph
repo_id = 'lamm-mit/bio-graph-1K'
graph_file = hf_hub_download(repo_id=repo_id, filename='large_graph_simple_giant.graphml')
G = nx.read_graphml(graph_file)
print(f"Graph loaded with {len(G.nodes())} nodes and {len(G.edges())} edges.")

# Step 4: Load the Embeddings
embedding_file = hf_hub_download(repo_id=repo_id, filename='embeddings_simple_giant_ge-large-en-v1.5.pkl')
with open(embedding_file, 'rb') as f:
    embeddings = pickle.load(f)

# Step 5: Node Similarity Function
def get_similar_nodes(node_id, top_k=5):
    if node_id not in embeddings:
        return []
    vec = embeddings[node_id].reshape(1, -1)
    keys = list(embeddings.keys())
    mat = np.stack([embeddings[k] for k in keys])
    sims = cosine_similarity(vec, mat)[0]
    top_indices = np.argsort(-sims)[1:top_k+1]
    return [(keys[i], sims[i]) for i in top_indices]

# Step 6: Shortest Path Finder
def find_path(start_node, end_node):
    if start_node not in G or end_node not in G:
        return "Invalid node(s)"
    try:
        path = nx.shortest_path(G, source=start_node, target=end_node)
        return path
    except nx.NetworkXNoPath:
        return "No path found"

# Step 7: GPT-Based Reasoning (Optional)
def query_gpt(prompt, model="gpt-4", temperature=0.7):
    openai.api_key = "sk-..."  # Replace with your API key
    response = openai.ChatCompletion.create(
        model=model,
        messages=[{"role": "system", "content": "You are a materials science reasoning assistant."},
                 {"role": "user", "content": prompt}],
        temperature=temperature
    )
    return response.choices[0].message['content']

# Example Use
# path = find_path("mineralized collagen fibers", "schreger pattern")
# print(path)

# similar_nodes = get_similar_nodes("hydroxyapatite")
# print(similar_nodes)

# response = query_gpt("What is the role of collagen in hierarchical biological structures?")
# print(response)


Collecting openai
  Downloading openai-1.78.1-py3-none-any.whl (680 kB)
Collecting packaging>=20.9
  Downloading packaging-25.0-py3-none-any.whl (66 kB)
Collecting pydantic<3,>=1.9.0
  Downloading pydantic-2.10.6-py3-none-any.whl (431 kB)
Collecting httpx<1,>=0.23.0
  Downloading httpx-0.28.1-py3-none-any.whl (73 kB)
Collecting sniffio
  Downloading sniffio-1.3.1-py3-none-any.whl (10 kB)
Collecting anyio<5,>=3.5.0
  Downloading anyio-4.5.2-py3-none-any.whl (89 kB)
Collecting jiter<1,>=0.4.0
  Downloading jiter-0.9.0-cp38-cp38-win_amd64.whl (198 kB)
Collecting distro<2,>=1.7.0
  Downloading distro-1.9.0-py3-none-any.whl (20 kB)
Collecting pydantic-core==2.27.2
  Downloading pydantic_core-2.27.2-cp38-cp38-win_amd64.whl (2.0 MB)
Collecting annotated-types>=0.6.0
  Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)
Collecting httpcore==1.*
  Downloading httpcore-1.0.9-py3-none-any.whl (78 kB)
Collecting exceptiongroup>=1.0.2; python_version < "3.11"
  Downloading exceptiongroup-1.3

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Graph loaded with 33159 nodes and 48753 edges.
