In [None]:
!pip install sentence-transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [None]:
import json

# Load datasets
with open('/content/dev_graph_lmm.json') as cognitive_map_file:
    cognitive_map_data = json.load(cognitive_map_file)

with open('/content/cleaned_environment_descriptions.json') as environment_description_file:
    environment_description_data = json.load(environment_description_file)

# Matching Function
def match_nodes(cognitive_map, environment_description, output_file_path):
    result = []  # Store matches for all instances

    for cognitive_instance in cognitive_map:
        instance_id = cognitive_instance.get("Instance_id")
        cognitive_edges = cognitive_instance.get("Graph", {}).get("Edges", [])
        cognitive_nodes = cognitive_instance.get("Graph", {}).get("Nodes", {})

        # Find matching environment description for this instance_id
        env_instance = next((e for e in environment_description if e.get("Instance_id") == instance_id), None)
        if not env_instance:
            continue
        # Track used nodes to prevent reuse
        used_cmap_nodes = set()
        used_env_nodes = set()

        env_nodes = env_instance.get("Node Description", {})
        env_edges = env_instance.get("actions", [])

        # Initialize match group for this instance
        instance_matches = {
            "Instance_id": instance_id,
            "Matches": []
        }

        # Step 1: Match the Starting Point
        if cognitive_edges and env_edges:
            cmap_start = str(cognitive_edges[0].get("from", ""))
            env_start = str(env_edges[0].get("from", ""))
            instance_matches["Matches"].append({
                "Cognitive_Node": {"ID": cmap_start, "Description": cognitive_nodes.get(cmap_start, "N/A")},
                "Environment_Node": {"ID": env_start, "Description": env_nodes.get(env_start, "N/A")},
                "Match_Type": "Starting Point"
            })

        # Step 2: Incremental Matching with Reuse
        cmap_index = 1  # Start after the first edge
        env_index = 1  # Start after the first edge

        while cmap_index < len(cognitive_edges) and env_index < len(env_edges):
            cmap_edge = cognitive_edges[cmap_index]
            env_edge = env_edges[env_index]

            cmap_action = cmap_edge.get("action", "")
            env_action = env_edge.get("action", "")

            cmap_from = str(cmap_edge.get("from", ""))
            env_from = str(env_edge.get("from", ""))

            # Skip unsupported actions
            if cmap_action not in ["Left", "Right"]:
                cmap_index += 1
                continue

            if env_action not in ["Left", "Right"]:
                env_index += 1
                continue

            # Step 4: Skip reused nodes
            if cmap_from in used_cmap_nodes:
                cmap_index += 1
                continue
            if env_from in used_env_nodes:
                env_index += 1
                continue


            # Match Left or Right actions with reuse
            if cmap_action in ["Left", "Right"] and cmap_action == env_action:
                instance_matches["Matches"].append({
                    "Cognitive_Node": {"ID": cmap_from, "Description": cognitive_nodes.get(cmap_from, "N/A")},
                    "Environment_Node": {"ID": env_from, "Description": env_nodes.get(env_from, "N/A")},
                    "Match_Type": env_action
                })
                used_cmap_nodes.add(cmap_from)
                used_env_nodes.add(env_from)
                cmap_index += 1  # Increment cmap_index only, allowing reuse of the env node
                continue

            # Increment env_index if no match
            env_index += 1

        # Step 3: Append Target Matches
        for cmap_edge in cognitive_edges:
            cmap_to = str(cmap_edge.get("to", ""))
            if "Target" in cognitive_nodes.get(cmap_to, ""):
                for env_edge in env_edges:
                    env_to = str(env_edge.get("to", ""))
                    if "Target" in env_nodes.get(env_to, ""):
                        instance_matches["Matches"].append({
                            "Cognitive_Node": {"ID": cmap_to, "Description": cognitive_nodes.get(cmap_to, "N/A")},
                            "Environment_Node": {"ID": env_to, "Description": env_nodes.get(env_to, "N/A")},
                            "Match_Type": "Target Match"
                        })

        # Add matches for this instance to the result
        result.append(instance_matches)

    # Save the result to a JSON file
    with open(output_file_path, 'w') as output_file:
        json.dump(result, output_file, indent=4)

    print(f"Matches saved to: {output_file_path}")

# Perform the matching
output_file_path = '/content/matched_nodes.json'
match_nodes(cognitive_map_data, environment_description_data, output_file_path)


Matches saved to: /content/matched_nodes.json


In [None]:
import json
from sentence_transformers import SentenceTransformer, util

# Load cognitive map, environment descriptions, and matched nodes
with open('/content/dev_graph_lmm.json') as cognitive_map_file:
    cognitive_map_data = json.load(cognitive_map_file)

with open('/content/cleaned_environment_descriptions.json') as environment_description_file:
    environment_description_data = json.load(environment_description_file)

with open('/content/matched_nodes.json') as matched_nodes_file:
    matched_nodes_data = json.load(matched_nodes_file)

# Load SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

def semantic_match(cognitive_id, cognitive_desc, env_nodes, used_env_nodes):
    """
    Finds the best semantic match for a cognitive node description in the unmatched environment nodes,
    ensuring the matched environment node has the same or a higher ID and is not already used.
    """
    if not env_nodes:
        return None, 0  # No possible matches

    # Filter unmatched environment nodes (ID >= cognitive_id and not already used)
    valid_env_nodes = {env_id: desc for env_id, desc in env_nodes.items()
                       if int(env_id) >= int(cognitive_id)-1 and env_id not in used_env_nodes}

    if not valid_env_nodes:
        return None, 0  # No valid matches

    # Convert environment node descriptions to a list
    env_desc_list = list(valid_env_nodes.values())
    env_ids_list = list(valid_env_nodes.keys())

    # Compute similarity scores
    embedding1 = model.encode(cognitive_desc, convert_to_tensor=True)
    embedding2 = model.encode(env_desc_list, convert_to_tensor=True)

    similarity_scores = util.pytorch_cos_sim(embedding1, embedding2)[0]  # Extract similarity scores

    # Get best match
    best_match_idx = similarity_scores.argmax().item()
    best_match_score = similarity_scores[best_match_idx].item()

    # Get corresponding environment node ID
    best_match_env_id = env_ids_list[best_match_idx]

    return best_match_env_id, best_match_score

# Store results
semantic_matches = []

for cognitive_instance in cognitive_map_data:
    instance_id = cognitive_instance.get("Instance_id")
    cognitive_nodes = cognitive_instance.get("Graph", {}).get("Nodes", {})
    cognitive_edges = cognitive_instance.get("Graph", {}).get("Edges", [])

    # Find corresponding environment instance
    env_instance = next((e for e in environment_description_data if e.get("Instance_id") == instance_id), None)
    if not env_instance:
        continue

    env_nodes = env_instance.get("Node Description", {})

    # Find incremental matches for this instance
    inc_match_instance = next((inc for inc in matched_nodes_data if inc.get("Instance_id") == instance_id), None)

    # Track matched nodes per instance to prevent reuse
    matched_cognitive_nodes = set()
    matched_environment_nodes = set()

    if inc_match_instance:
        for match in inc_match_instance.get("Matches", []):
            matched_cognitive_nodes.add(match["Cognitive_Node"]["ID"])
            matched_environment_nodes.add(match["Environment_Node"]["ID"])

    # Store matches for this instance
    instance_matches = {"Instance_id": instance_id, "Matches": []}

    # Track used environment nodes per instance
    used_env_nodes = set(matched_environment_nodes)

    # Iterate over each cognitive node, skipping already matched nodes
    for cmap_id, cmap_desc in cognitive_nodes.items():
        if cmap_id in matched_cognitive_nodes:
            continue  # Skip nodes already matched in incremental matching

        best_match_env_id, match_score = semantic_match(cmap_id, cmap_desc, env_nodes, used_env_nodes)

        if best_match_env_id and match_score > 0.68:  # Apply similarity threshold
            instance_matches["Matches"].append({
                "Cognitive_Node": {"ID": cmap_id, "Description": cmap_desc},
                "Environment_Node": {"ID": best_match_env_id, "Description": env_nodes.get(best_match_env_id, "N/A")},
                "Match_Type": f"Semantic Match (Score: {match_score:.2f})"
            })

            # Mark this environment node as used
            used_env_nodes.add(best_match_env_id)

    if instance_matches["Matches"]:
        semantic_matches.append(instance_matches)

# Save matches to a JSON file
output_file_path = '/content/semantic_matched_nodes.json'
with open(output_file_path, 'w') as output_file:
    json.dump(semantic_matches, output_file, indent=4)

print(f"Semantic matches saved to: {output_file_path}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Semantic matches saved to: /content/semantic_matched_nodes.json


MERGE


In [None]:
import json

# Load matched node datasets
with open('/content/matched_nodes.json') as matched_nodes_file:
    matched_nodes_data = json.load(matched_nodes_file)

with open('/content/semantic_matched_nodes.json') as semantic_matches_file:
    semantic_matched_data = json.load(semantic_matches_file)

# Create a dictionary to store merged matches by Instance ID
merged_matches = {}

# Process incremental matches first (priority)
for inc in matched_nodes_data:
    instance_id = inc.get('Instance_id')
    matches = inc.get('Matches', [])

    if instance_id not in merged_matches:
        merged_matches[instance_id] = {"Instance_id": instance_id, "Matches": []}

    merged_matches[instance_id]["Matches"].extend(matches)  # Add incremental matches

# Process semantic matches, ensuring correct format
for sem in semantic_matched_data:
    instance_id = sem.get('Instance_id')

    if instance_id not in merged_matches:
        merged_matches[instance_id] = {"Instance_id": instance_id, "Matches": []}

    # Iterate over matches inside the semantic match entry
    for match in sem.get("Matches", []):
        cognitive_node = match.get("Cognitive_Node", {})
        env_node = match.get("Environment_Node", {})
        match_type = match.get("Match_Type", "Semantic Match")

        if not cognitive_node or not env_node:
            continue  # Skip invalid entries

        # Ensure proper nesting
        semantic_entry = {
            "Cognitive_Node": {
                "ID": cognitive_node.get('ID', 'Unknown'),
                "Description": cognitive_node.get('Description', 'N/A')
            },
            "Environment_Node": {
                "ID": env_node.get('ID', 'Unknown'),
                "Description": env_node.get('Description', 'N/A')
            },
            "Match_Type": match_type
        }

        # Append to the existing instance matches
        merged_matches[instance_id]["Matches"].append(semantic_entry)

# Sort matches within each instance by Cognitive_Node ID
for instance in merged_matches.values():
    instance["Matches"].sort(key=lambda x: int(x["Cognitive_Node"]["ID"]))

# Convert dictionary to list format for JSON output
final_matches = list(merged_matches.values())

# Save merged matches
output_file_path = "/content/merged_matched_nodes.json"
with open(output_file_path, "w") as output_file:
    json.dump(final_matches, output_file, indent=4)

print(f"Merged and sorted matched nodes saved to: {output_file_path}")


Merged and sorted matched nodes saved to: /content/merged_matched_nodes.json


Fixed Cost (-1) Matching (Current DP Implementation)

In [None]:
import json
import numpy as np

# Load Cognitive Map (CM)
with open("/content/dev_graph_lmm.json", "r") as f:
    cognitive_map_data = json.load(f)

# Load Environment Map (EM)
with open("/content/cleaned_environment_descriptions.json", "r") as f:
    environment_map_data = json.load(f)

# Load previous matches from incremental matching
with open("/content/merged_matched_nodes.json", "r") as f:
    incremental_matches = json.load(f)

# Dynamic Programming (DP) Refinement for remaining unmatched nodes
def dp_refinement(cm_nodes, em_nodes, initial_matches, gap_penalty=0.1):
    cm_list = list(cm_nodes.keys())
    em_list = list(em_nodes.keys())

    # Exclude already matched nodes
    matched_cm = {match["Cognitive_Node"]["ID"] for match in initial_matches}
    matched_em = {match["Environment_Node"]["ID"] for match in initial_matches}

    unmatched_cm = [node for node in cm_list if node not in matched_cm]
    unmatched_em = [node for node in em_list if node not in matched_em]

    N, M = len(unmatched_cm), len(unmatched_em)
    dp = np.zeros((N+1, M+1))

    # Initialize DP table
    for i in range(1, N+1):
        dp[i][0] = i * gap_penalty
    for j in range(1, M+1):
        dp[0][j] = j * gap_penalty

    # Fill DP table
    for i in range(1, N+1):
        for j in range(1, M+1):
            match_cost = -1  # Default matching cost (lower is better)
            dp[i][j] = min(
                dp[i-1][j-1] + match_cost,  # Match
                dp[i-1][j] + gap_penalty,   # Skip CM node
                dp[i][j-1] + gap_penalty    # Skip EM node
            )

    # Backtrack to find optimal matches
    i, j = N, M
    final_matches = []
    while i > 0 and j > 0:
        if dp[i][j] == dp[i-1][j-1] - 1:  # Match found
            final_matches.append({
                "Cognitive_Node": {"ID": unmatched_cm[i-1], "Description": cm_nodes[unmatched_cm[i-1]]},
                "Environment_Node": {"ID": unmatched_em[j-1], "Description": em_nodes[unmatched_em[j-1]]},
                "Match_Type": "DP Match"
            })
            i -= 1
            j -= 1
        elif dp[i][j] == dp[i-1][j] + gap_penalty:  # Skip CM node
            i -= 1
        else:  # Skip EM node
            j -= 1

    return final_matches

# Apply DP refinement for each instance
for match_instance in incremental_matches:
    instance_id = match_instance["Instance_id"]
    cm_instance = next((cm for cm in cognitive_map_data if cm["Instance_id"] == instance_id), None)
    em_instance = next((em for em in environment_map_data if em["Instance_id"] == instance_id), None)

    if not cm_instance or not em_instance:
        continue

    cm_nodes = cm_instance["Graph"]["Nodes"]
    em_nodes = em_instance["Node Description"]
    initial_matches = match_instance["Matches"]

    # Apply DP on remaining nodes
    dp_matches = dp_refinement(cm_nodes, em_nodes, initial_matches)

    # Append DP matches to the instance's matches
    match_instance["Matches"].extend(dp_matches)

# Save the refined matches
output_path = "/content/final_matched_nodes.json"
with open(output_path, "w") as f:
    json.dump(incremental_matches, f, indent=4)

print(f"Final matched nodes saved to: {output_path}")

Final matched nodes saved to: /content/final_matched_nodes.json


TF-IDF + Cosine Similarity

In [None]:
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load Cognitive Map (CM)
with open("/content/dev_graph_lmm.json", "r") as f:
    cognitive_map_data = json.load(f)

# Load Environment Map (EM)
with open("/content/cleaned_environment_descriptions.json", "r") as f:
    environment_description_data = json.load(f)

# Load previously matched nodes from incremental and semantic matching
with open("/content/merged_matched_nodes.json", "r") as f:
    incremental_matches = json.load(f)

# Function to compute TF-IDF similarity scores
def compute_similarity_scores(cm_nodes, em_nodes):
    cm_desc_list = list(cm_nodes.values())
    em_desc_list = list(em_nodes.values())

    # Compute TF-IDF embeddings
    vectorizer = TfidfVectorizer().fit(cm_desc_list + em_desc_list)
    cm_vectors = vectorizer.transform(cm_desc_list)
    em_vectors = vectorizer.transform(em_desc_list)

    # Compute cosine similarity between CM and EM nodes
    similarity_matrix = cosine_similarity(cm_vectors, em_vectors)

    return similarity_matrix

# Function to apply DP on remaining unmatched nodes
def dp_refinement_with_tfidf(cm_nodes, em_nodes, initial_matches, gap_penalty=0.5):
    cm_list = list(cm_nodes.keys())
    em_list = list(em_nodes.keys())

    # Exclude already matched nodes
    matched_cm = {match["Cognitive_Node"]["ID"] for match in initial_matches}
    matched_em = {match["Environment_Node"]["ID"] for match in initial_matches}

    unmatched_cm = [node for node in cm_list if node not in matched_cm]
    unmatched_em = [node for node in em_list if node not in matched_em]

    if not unmatched_cm or not unmatched_em:
        return []

    # Compute TF-IDF similarity scores
    similarity_matrix = compute_similarity_scores(
        {k: cm_nodes[k] for k in unmatched_cm},
        {k: em_nodes[k] for k in unmatched_em}
    )

    N, M = len(unmatched_cm), len(unmatched_em)
    dp = np.zeros((N+1, M+1))

    # Initialize DP table with gap penalties
    for i in range(1, N+1):
        dp[i][0] = i * gap_penalty
    for j in range(1, M+1):
        dp[0][j] = j * gap_penalty

    # Fill DP table using TF-IDF similarity scores
    for i in range(1, N+1):
        for j in range(1, M+1):
            match_cost = -similarity_matrix[i-1][j-1]  # Higher similarity = lower cost
            dp[i][j] = min(
                dp[i-1][j-1] + match_cost,  # Match
                dp[i-1][j] + gap_penalty,   # Skip CM node
                dp[i][j-1] + gap_penalty    # Skip EM node
            )

    # Backtrack to find optimal matches
    i, j = N, M
    final_matches = []
    while i > 0 and j > 0:
        if dp[i][j] == dp[i-1][j-1] - similarity_matrix[i-1][j-1]:  # Best match found
            final_matches.append({
                "Cognitive_Node": {"ID": unmatched_cm[i-1], "Description": cm_nodes[unmatched_cm[i-1]]},
                "Environment_Node": {"ID": unmatched_em[j-1], "Description": em_nodes[unmatched_em[j-1]]},
                "Match_Type": f"DP Match (Similarity: {similarity_matrix[i-1][j-1]:.2f})"
            })
            i -= 1
            j -= 1
        elif dp[i][j] == dp[i-1][j] + gap_penalty:  # Skip CM node
            i -= 1
        else:  # Skip EM node
            j -= 1

    return final_matches

# Apply DP refinement for each instance
for match_instance in incremental_matches:
    instance_id = match_instance["Instance_id"]
    cm_instance = next((cm for cm in cognitive_map_data if cm["Instance_id"] == instance_id), None)
    em_instance = next((em for em in environment_description_data if em["Instance_id"] == instance_id), None)

    if not cm_instance or not em_instance:
        continue

    cm_nodes = cm_instance["Graph"]["Nodes"]
    em_nodes = em_instance["Node Description"]
    initial_matches = match_instance["Matches"]

    # Apply DP only on remaining unmatched nodes
    dp_matches = dp_refinement_with_tfidf(cm_nodes, em_nodes, initial_matches)

    # Append DP matches to the instance's matches
    match_instance["Matches"].extend(dp_matches)

# Save the refined matches
output_path = "/content/final_matched_nodes_tfidf.json"
with open(output_path, "w") as f:
    json.dump(incremental_matches, f, indent=4)

print(f"Final matched nodes (TF-IDF DP) saved to: {output_path}")


Final matched nodes (TF-IDF DP) saved to: /content/final_matched_nodes_tfidf.json


SBERT+cosine similarity

In [None]:
import json
import numpy as np
from sentence_transformers import SentenceTransformer, util

# Load Cognitive Map (CM)
with open("/content/dev_graph_lmm.json", "r") as f:
    cognitive_map_data = json.load(f)

# Load Environment Map (EM)
with open("/content/cleaned_environment_descriptions.json", "r") as f:
    environment_description_data = json.load(f)

# Load previously matched nodes from incremental and semantic matching
with open("/content/merged_matched_nodes.json", "r") as f:
    incremental_matches = json.load(f)

# Load SBERT model for semantic similarity
model = SentenceTransformer("all-MiniLM-L6-v2")

# Function to compute SBERT similarity scores
def compute_semantic_similarity(cm_nodes, em_nodes):
    cm_desc_list = list(cm_nodes.values())
    em_desc_list = list(em_nodes.values())

    # Compute SBERT embeddings
    cm_embeddings = model.encode(cm_desc_list, convert_to_tensor=True)
    em_embeddings = model.encode(em_desc_list, convert_to_tensor=True)

    # Compute cosine similarity between CM and EM nodes
    similarity_matrix = util.pytorch_cos_sim(cm_embeddings, em_embeddings).cpu().numpy()

    return similarity_matrix

# Function to apply DP on remaining unmatched nodes
def dp_refinement_with_sbert(cm_nodes, em_nodes, initial_matches, gap_penalty=0.3):
    cm_list = list(cm_nodes.keys())
    em_list = list(em_nodes.keys())

    # Exclude already matched nodes
    matched_cm = {match["Cognitive_Node"]["ID"] for match in initial_matches}
    matched_em = {match["Environment_Node"]["ID"] for match in initial_matches}

    unmatched_cm = [node for node in cm_list if node not in matched_cm]
    unmatched_em = [node for node in em_list if node not in matched_em]

    if not unmatched_cm or not unmatched_em:
        return []

    # Compute SBERT similarity scores
    similarity_matrix = compute_semantic_similarity(
        {k: cm_nodes[k] for k in unmatched_cm},
        {k: em_nodes[k] for k in unmatched_em}
    )

    N, M = len(unmatched_cm), len(unmatched_em)
    dp = np.zeros((N+1, M+1))

    # Initialize DP table with gap penalties
    for i in range(1, N+1):
        dp[i][0] = i * gap_penalty
    for j in range(1, M+1):
        dp[0][j] = j * gap_penalty

    # Fill DP table using SBERT similarity scores
    for i in range(1, N+1):
        for j in range(1, M+1):
            match_cost = -similarity_matrix[i-1][j-1]  # Higher similarity = lower cost
            dp[i][j] = min(
                dp[i-1][j-1] + match_cost,  # Match
                dp[i-1][j] + gap_penalty,   # Skip CM node
                dp[i][j-1] + gap_penalty    # Skip EM node
            )

    # Backtrack to find optimal matches
    i, j = N, M
    final_matches = []
    while i > 0 and j > 0:
        if dp[i][j] == dp[i-1][j-1] - similarity_matrix[i-1][j-1]:  # Best match found
            final_matches.append({
                "Cognitive_Node": {"ID": unmatched_cm[i-1], "Description": cm_nodes[unmatched_cm[i-1]]},
                "Environment_Node": {"ID": unmatched_em[j-1], "Description": em_nodes[unmatched_em[j-1]]},
                "Match_Type": f"DP Match (Semantic Similarity: {similarity_matrix[i-1][j-1]:.2f})"
            })
            i -= 1
            j -= 1
        elif dp[i][j] == dp[i-1][j] + gap_penalty:  # Skip CM node
            i -= 1
        else:  # Skip EM node
            j -= 1

    return final_matches

# Apply DP refinement for each instance
for match_instance in incremental_matches:
    instance_id = match_instance["Instance_id"]
    cm_instance = next((cm for cm in cognitive_map_data if cm["Instance_id"] == instance_id), None)
    em_instance = next((em for em in environment_description_data if em["Instance_id"] == instance_id), None)

    if not cm_instance or not em_instance:
        continue

    cm_nodes = cm_instance["Graph"]["Nodes"]
    em_nodes = em_instance["Node Description"]
    initial_matches = match_instance["Matches"]

    # Apply DP only on remaining unmatched nodes
    dp_matches = dp_refinement_with_sbert(cm_nodes, em_nodes, initial_matches)

    # Append DP matches to the instance's matches
    match_instance["Matches"].extend(dp_matches)

# Save the refined matches
output_path = "/content/final_matched_nodes_sbert.json"
with open(output_path, "w") as f:
    json.dump(incremental_matches, f, indent=4)

print(f"Final matched nodes (SBERT DP) saved to: {output_path}")


Final matched nodes (SBERT DP) saved to: /content/final_matched_nodes_sbert.json
