## Environment Settings

In [2]:
import sys
sys.path.append('/home/svu/e0315913/.local/lib/python3.8/site-packages')
sys.path.append('/home/svu/e0315913/.local/bin')
sys.path.append("/hpctmp/e0315913/demo/CS5284_Project/GNN-cluster/config/demo_config.yaml")

import os
os.chdir('/hpctmp/e0315913/demo/CS5284_Project/GNN-cluster/')

import logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

Import libraries

In [3]:
import random, torch
import numpy as np
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

In [4]:
from src.utils.config import load_config, validate_config
from src.utils.evaluation import evaluate
from src.models.alpha import FullOutput, Metrics, threshold_based_candidates, calculate_avg_metrics
from src.models.rgcn_model import RGCNModel
from src.models.alpha import Output

Set config and device

In [7]:
CONFIG_PATH = "config/demo_config.yaml"

config = load_config(CONFIG_PATH)
required_keys = [
    'model','train', 'node_embed', 'idxes',
    'train_qa_data', 'test_qa_data', 'num_hops',
]
validate_config(config, required_keys)

In [8]:
torch.manual_seed(2024)
random.seed(2024)
np.random.seed(2024)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Load Encoding Model (multi-qa-MiniLM-L6-cos-v1)

In [9]:
import json, random, torch
import pandas as pd

from torch_geometric.utils import k_hop_subgraph, subgraph
from torch_geometric.utils.convert import from_networkx
from torch_geometric.data import Data
import networkx as nx

from sentence_transformers import util, SentenceTransformer

  warn(f"Failed to load image Python extension: {e}")


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentenceTransformer("/hpctmp/e0315913/CS5284_Project/GNN-cluster/src/models/models--sentence-transformers--multi-qa-MiniLM-L6-cos-v1/snapshots/2d981ed0b0b8591b038d472b10c38b96016aab2e")
model.to(device)

2024-11-20 10:12:18,536 - INFO - Use pytorch device_name: cuda
2024-11-20 10:12:18,537 - INFO - Load pretrained SentenceTransformer: /hpctmp/e0315913/CS5284_Project/GNN-cluster/src/models/models--sentence-transformers--multi-qa-MiniLM-L6-cos-v1/snapshots/2d981ed0b0b8591b038d472b10c38b96016aab2e


SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

## KGQADataset

KGQADataset has been modified to return actual paths (not embeddings) and candidate node values

In [11]:
class KGQADataset(torch.utils.data.Dataset):
    def __init__(self, path_to_node_embed, path_to_idxes, path_to_qa, path_to_kb, from_paths_activate, entity_sbert, k=3):
        self.from_paths_activate = from_paths_activate
        self.entity_sbert = entity_sbert

        if self.from_paths_activate:
            self.G = self.generate_nx_graph(path_to_kb)
        self.loaded_entity_to_idx, self.loaded_edge_index, self.loaded_relations = self.load_data_json(path_to_idxes)
        self.data = self.create_data_object(self.loaded_edge_index, self.loaded_relations, self.loaded_entity_to_idx)
        self.num_relations = len(set(self.loaded_relations))
        self.k = k
        self.entity_sbert_embeddings = self.get_entity_sbert_embeddings(self.loaded_entity_to_idx)
        self.node2vec_embeddings = self.load_node2vec_embeddings(path_to_node_embed)
        self.df = pd.read_csv(path_to_qa, sep='\t', header=None, names=['question', 'answer'])
        self.df['answer'] = self.df['answer'].apply(lambda x: x.split("|"))
        self.q_embeddings = model.encode(
            [q.replace("[", "").replace("]", "") for q in self.df['question']],
            batch_size=128,
            convert_to_tensor=True
        )

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        question, answers = row['question'], row['answer']
        entity = self.extract_entity_from_question(question)
        entity_node = self.loaded_entity_to_idx[entity]
        question_embedding = self.q_embeddings[idx]
        subset_node_indices, sub_edge_index, _, edge_mask = self.get_k_hop_subgraph(entity_node)
        subgraph_data, node_map = self.construct_subgraph(subset_node_indices, sub_edge_index, edge_mask)
        labels = self.get_labels(answers, node_map)
        embedding_dim = 384 if (self.from_paths_activate or self.entity_sbert) else 64
        subgraph_data.x = self.get_node_embeddings(node_map, question_embedding, entity, embedding_dim)

        actual_paths = []
        if self.from_paths_activate:
            paths = self.find_paths(self.G, entity, 2) + self.find_paths(self.G, entity, 1)
            for path in paths:
                actual_paths.append([(node, relation) for node, relation in path])

        return subgraph_data, question_embedding, labels, node_map, actual_paths

    def get_entity_sbert_embeddings(self, loaded_entity_to_idx):
        entities = list(loaded_entity_to_idx.keys())
        encoded_values = model.encode(entities, batch_size=128, convert_to_tensor=False).tolist()
        out = {e: encoded_values[i] for i, e in enumerate(entities)}
        return out

    def get_k_hop_subgraph(self, entity_node):
        subset, sub_edge_index, mapping, edge_mask = k_hop_subgraph(
            node_idx=entity_node,
            num_hops=self.k,
            edge_index=self.data.edge_index,
            relabel_nodes=True
        )
        return subset, sub_edge_index, mapping, edge_mask

    def construct_subgraph(self, subset_node_indices, sub_edge_index, edge_mask):
        node_map = {old_idx.item(): new_idx for new_idx, old_idx in enumerate(subset_node_indices)}
        sub_edge_attr = self.data.edge_attr[edge_mask]
        subgraph_data = Data(edge_index=sub_edge_index, edge_attr=sub_edge_attr) # sub_edge_index is get_k_hop_subgraph's sub_edge_index
        return subgraph_data, node_map

    def load_data_json(self, filename):
        with open(filename, 'r') as f:
            data = json.load(f)
        return data['entity_to_idx'], data['edge_index'], data['relations']

    def load_data_pt(self, filename):
        data = torch.load(filename)
        if isinstance(data, list):
            return data 
        else:
            raise ValueError("Expected a list of k-hop subgraph node indices.")

    def create_data_object(self, edge_index, relations, entity_to_idx):
        unique_relations = list(set(relations))
        relation_mapping = {relation: index for index, relation in enumerate(unique_relations)}

        edge_index = torch.tensor(edge_index).t().contiguous()
        undirected_edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1) 
        edge_attr = torch.tensor([relation_mapping[rel] for rel in relations])
        undirected_edge_attr = torch.cat([edge_attr, edge_attr], dim=0)
        return Data(edge_index=undirected_edge_index, edge_attr=undirected_edge_attr, num_nodes=len(entity_to_idx))

    def load_node2vec_embeddings(self, file_path, embedding_dim=64):
        embeddings_dict = {}
        with open(file_path, 'r') as f:
            next(f)
            for line in f:
                parts = line.strip().split()
                entity = " ".join(parts[:-embedding_dim]) 
                embedding = list(map(float, parts[-embedding_dim:])) 
                embeddings_dict[entity] = embedding

        return embeddings_dict

    def extract_entity_from_question(self, question):
        start = question.find('[') + 1
        end = question.find(']')
        if start == 0 or end == -1:
            raise ValueError(f"No entity found in the question: {question}")
        return question[start:end]

    def get_labels(self, answers, node_map):
        labels = torch.zeros(len(node_map), dtype=torch.long)
        for ans in answers:
            if ans in self.loaded_entity_to_idx:
                ans_idx = self.loaded_entity_to_idx[ans]
                if ans_idx in node_map:
                    labels[node_map[ans_idx]] = 1
        return labels

    def get_node_embeddings(self, node_map, question_embedding, entity, embedding_dim, random_init=False):
        embeddings = [np.zeros(embedding_dim).tolist() for _ in range(len(node_map))]
        idx_to_entity = {v: k for k, v in self.loaded_entity_to_idx.items()}  # Reverse map

        if self.from_paths_activate:
            result = self.find_best_embedding(self.G, entity, question_embedding)
            result[entity] = question_embedding 

        for ori, new in node_map.items():
            if random_init:
                embeddings[new] = [random.uniform(-0.1, 0.1) for _ in range(embedding_dim)]
            elif ori in idx_to_entity and idx_to_entity[ori] in self.node2vec_embeddings:
                if self.entity_sbert:
                    embeddings[new] = self.entity_sbert_embeddings[idx_to_entity[ori]]
                elif self.from_paths_activate:
                    embeddings[new] = result[idx_to_entity[ori]].tolist()
                else:
                    embeddings[new] = self.node2vec_embeddings[idx_to_entity[ori]]

        embeddings = np.array(embeddings)
        if embeddings.ndim != 2:
            raise ValueError(f"Embeddings array must be 2D but got {embeddings.ndim}D")

        return torch.tensor(embeddings, dtype=torch.float)

    def generate_nx_graph(self, path):
        df = pd.read_csv(path, sep='|', header=None, names=['entity1', 'relation', 'entity2'])
        df_unique = df.drop_duplicates() # 133582 edges after dedup
        reverse_relations = {
        'directed_by': 'directed',
        'written_by': 'written',
        'starred_actors': 'starring',
        'has_tags': 'is_tagged_to',
        'has_genre': 'is_genre_of',
        'has_imdb_rating': 'is_imdb_rating_of',
        'has_imdb_votes': 'is_imdb_votes_of',
        'in_language': 'language_of',
        'release_year': 'is_released_year_of'
        }

        reverse_rows = []
        for index, row in df_unique.iterrows():
            reverse_relation = reverse_relations[row['relation']]
            reverse_row = {'entity1': row['entity2'], 'relation': reverse_relation, 'entity2': row['entity1']}
            reverse_rows.append(reverse_row)

        df_reverse = pd.DataFrame(reverse_rows) # 133582 edges
        df_combined = pd.concat([df_unique, df_reverse], ignore_index=True) # 267164 edges

        df_final = df_combined.groupby(['entity1', 'entity2'], as_index=False).agg({
            'relation': ' and '.join
        }) # 249349 edges
        df_final['relation'] = df_final['relation'].str.replace('_', ' ')
        G = nx.from_pandas_edgelist(df_final, source='entity1', target='entity2', edge_attr='relation', create_using=nx.DiGraph())
        return G

    def find_paths(self, G, u, n):

        if n == 0:
            return [[(u, None)]]

        paths = [
            [(u, G[u][neighbor]['relation'])] + path
            for neighbor in G.neighbors(u)
            for path in self.find_paths(G, neighbor, n - 1)
            if u not in [node for node, _ in path] # Avoid cycles
        ]
        return paths

    def find_best_embedding(self, G, query_entity, q_embedding):

        paths = self.find_paths(G, query_entity, 2) + self.find_paths(G, query_entity, 1)

        sentences = []
        candidates = []

        for tuple_list in paths:
            # Extract the last entity (candidate) in the path
            candidate_entity = tuple_list[-1][0]

            if candidate_entity != query_entity: # Avoid looping back to the query_entity
                candidates.append(candidate_entity)
                # Create the sentence for the path
                sentence = ' '.join(f"{tup[0]} {tup[1]}" if tup[1] else tup[0] for tup in tuple_list)
                sentences.append(sentence)

        path_embeddings = model.encode(sentences, batch_size=128, convert_to_tensor=True)
        cosine_scores = util.cos_sim(q_embedding, path_embeddings)[0]
        best_embeddings = {}
        best_scores = {}

        for idx, candidate in enumerate(candidates):
            cosine_score = cosine_scores[idx].item()

            if candidate not in best_embeddings or cosine_score > best_scores[candidate]:
                best_scores[candidate] = cosine_score
                best_embeddings[candidate] = path_embeddings[idx]

        return best_embeddings

In [12]:
import torch
from torch_geometric.data import Batch

def collate_fn(batch):
    """
    DataLoader expects each batch to contain tensors or arrays, but torch_geometric.data.Data objects need to be batched in a special way.
    """
    subgraphs, question_embeddings, labels, node_maps, path = zip(*batch)

    # Batch the subgraphs
    batched_subgraphs = Batch.from_data_list(subgraphs)

    # Stack the question embeddings and labels
    question_embeddings = torch.stack(question_embeddings)

    # Concatenate labels and reshape to (N, 1) where N is the total number of nodes in the batch
    stacked_labels = torch.cat(labels).unsqueeze(1)

    return batched_subgraphs, question_embeddings, stacked_labels, node_maps, list(labels), list(path)


# Extraction Function

The function extractes candidate nodes from GNN and paths of the candidate node

In [23]:
def extract_candidate_nodes_and_paths(dataloader, model, device, threshold_value, train_dataset):
    model.eval()

    # Initialize lists to store processed data
    all_paths = []  # List to store paths of high similarity nodes
    all_candidate_values = []  # List to store candidate node values

    reversed_index = {value: key for key, value in train_dataset.loaded_entity_to_idx.items()}

    with torch.no_grad():
        for (
            batched_subgraphs,
            question_embeddings,
            stacked_labels,
            node_maps,
            labels,
            paths
        ) in tqdm(dataloader, desc="Extracting subgraph", leave=True):
            # Perform forward pass
            batched_subgraphs = batched_subgraphs.to(device)
            question_embeddings = question_embeddings.to(device)
            full_output = model(batched_subgraphs, question_embeddings)

            # Extract relevant outputs
            output = full_output.output if hasattr(full_output, "output") else full_output
            threshold = full_output.threshold if hasattr(full_output, "threshold") else threshold_value
            candidates_mask, _ = threshold_based_candidates(output, threshold=threshold)

            # Unpack the node_maps tuple to access the dictionary
            node_maps_dict = node_maps[0] if isinstance(node_maps, tuple) else node_maps

            # Extract the values of nodes that are marked as candidates (masked as 1)
            high_similarity_indices = (candidates_mask == 1).nonzero(as_tuple=True)[0]
            candidate_values_batch = []
            for idx in high_similarity_indices:
                original_node_value = list(node_maps_dict.keys())[idx.item()]
                original_node_value = reversed_index[original_node_value]
                candidate_values_batch.append(original_node_value)
            all_candidate_values.append(candidate_values_batch)

            
            all_paths.append(paths)
    return all_paths[0][0], all_candidate_values[0]

## Demo

Train dataset is loaded

In [15]:
train_dataset = KGQADataset(
    path_to_node_embed=config['node_embed'],
    path_to_idxes=config['idxes'],
    path_to_qa=config['train_qa_data'],
    path_to_kb=config['raw_kb'],
    from_paths_activate=True,
    entity_sbert=False,
    k=config['num_hops']
)

Batches:   0%|          | 0/338 [00:00<?, ?it/s]

Batches:   0%|          | 0/930 [00:00<?, ?it/s]

The following index is chosen for demo.

In [16]:
start_ind = 50004
end_ind =50005

Question and ground truth answer

In [97]:
question = train_dataset.df.iloc[start_ind:end_ind]['question'].tolist()[0]
ground_truth_answers = train_dataset.df.iloc[start_ind:end_ind]['answer'].tolist()[0]

print(f"Question: {question}")
print(f"Ground Truth Answer: {ground_truth_answers}")

Question: who is listed as director of [Hiromi Nagasaku] acted films
Ground Truth Answer: ['Kiyoshi Kurosawa']


The batch of specific index is loaded

In [18]:
num_relations = train_dataset.num_relations
sub_train_dataset = Subset(train_dataset, list(range(start_ind,end_ind)))

train_loader = DataLoader(
    sub_train_dataset,
    batch_size=config['train']['batch_size'],
    collate_fn=collate_fn,
    shuffle=True
)

Load pretrained GNN for output

In [19]:
model_test2 = RGCNModel(
            node_dim=config['model']['in_channels'],
            question_dim=train_dataset.q_embeddings.size(-1),
            hidden_dim=config['model']['hidden_channels'],
            num_relations=38,
            output_dim=config['model']['out_channels'],
            num_rgcn=config['model']['num_layers'],
            reduced_qn_dim=config['model']['reduced_qn_dim'],
            reduced_node_dim=config['model']['reduced_node_dim'],
            output_embedding=config['model']['output_embedding'],
            use_residuals=config['model']['use_residuals']
        )

checkpoint = torch.load(config['model_path_test2'])
model_test2.load_state_dict(checkpoint['model_state_dict'])
model_test2 = model_test2.to(device)

  checkpoint = torch.load(config['model_path_test2'])


Load output specific configurations

In [20]:
equal_subgraph_weighting = config['train']['equal_subgraph_weighting']
threshold_value = 0.50
hits_at_k = config['train']['hits_at_k']

In [25]:
paths, candidate_nodes = extract_candidate_nodes_and_paths(train_loader, model_test2, device, threshold_value, train_dataset)

Extracting subgraph:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting subgraph: 100%|██████████| 1/1 [00:00<00:00, 12.62it/s]


In [26]:
candidate_nodes

['Comedy',
 '1993',
 '2003',
 'Drew Barrymore',
 'Kiyoshi Kurosawa',
 'Doppelganger',
 'Hiromi Nagasaku']

In [28]:
paths

[[('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'release year'),
  ('1993', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'release year'),
  ('2003', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'has genre'),
  ('Comedy', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'starred actors'),
  ('Drew Barrymore', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'in language'),
  ('Japanese', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'directed by and written by'),
  ('Kiyoshi Kurosawa', None)],
 [('Hiromi Nagasaku', 'starring'), ('Doppelganger', None)]]

# RAG

In [31]:
from transformers import AutoTokenizer, AutoModelForCausalLM

Load Llama3.1 as LLM Model

In [32]:
llm_tokenizer = AutoTokenizer.from_pretrained("/hpctmp/e0315913/transformers_cache/models--akjindal53244--Llama-3.1-Storm-8B/snapshots/df21b06dcf534b026dd301a44a521d7253c8b94b")
llm_model = AutoModelForCausalLM.from_pretrained("/hpctmp/e0315913/transformers_cache/models--akjindal53244--Llama-3.1-Storm-8B/snapshots/df21b06dcf534b026dd301a44a521d7253c8b94b")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [95]:
import re

def create_prompt(question, paths, candidate_answers):
    """
    Formats the question and candidate paths into a prompt.
    """
    prompt = f"You are doing extractive question answering. Strictly use the following pieces of context to choose correct candidate answer to the question from Candidate Answers. Answer directly, without elaboration. Output in comma-separated form.\n"
    prompt += f"Question: {question}\nPaths:\n"
    for path in paths:
        prompt += f"- {path}\n"
    prompt += f"Candidate Answers: {candidate_answers}\n"
    prompt += "Answer: "
    return prompt

def query_llm(question, paths, candidate_answers):
    """
    Queries the LLM with the formatted question and paths.
    """
    prompt = create_prompt(question, paths, candidate_answers)
    print(f"Prompt: {prompt}")
    
    llm_model.to("cpu")
    inputs = llm_tokenizer(prompt, return_tensors="pt").to("cpu")
    
    prompt_length = inputs.input_ids.size(1)
    max_length = prompt_length + 50
    outputs = llm_model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=1,
        temperature=0.7,
        top_p=0.9,
        top_k=50,
        no_repeat_ngram_size=3,
        pad_token_id=llm_tokenizer.eos_token_id
    )
    decoded_outputs = llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
    full_response = decoded_outputs[0]
    match = re.search(r"Answer:\s*(.*?)(?:[\n]|$)", full_response, re.IGNORECASE)
    if match:
        response = match.group(1).strip()
    else:
        response = full_response.strip()
    response = response.split('.')[0]
    response = response.replace("\n", "").replace(" and ", ", ").strip()
    response = ", ".join([genre.strip().capitalize() for genre in response.split(",")])
    response = extract_unique_comma_separated(response, candidate_answers)
    print(f"{response}\n")
    return response

def format_paths_for_llm(candidate_paths):
    formatted_paths = []
    for path in candidate_paths:
        path_sentence = " -> ".join(
            f"{node} {relation}" if relation else f"{node}" for node, relation in path
        )
        formatted_paths.append(path_sentence)
    return formatted_paths

def extract_unique_comma_separated(input_string, candidate_answers):
    items = [item.strip() for item in input_string.split(",")]
    unique_items = list(set(items))
    unique_items.sort()
    result = ", ".join(unique_items)
    input_string = input_string.lower()
    final_answers = []
    for answers in candidate_answers:
        answers_string = answers.lower().split()
        for answer in answers_string:
            if answer in input_string:
                final_answers.append(answers)
                break
    final_answers = ", ".join(final_answers)
    return final_answers

## Candidate Paths Extraction

In [46]:
def extract_entity_from_question(question):
    start = question.find("[") + 1
    end = question.find("]")
    if start == 0 or end == -1:
        raise ValueError(f"No entity found in the question: {question}")
    return question[start:end]


def filter_path_with_entity(entity, candidate_paths):
    filtered_path = []
    for path in candidate_paths:
        if path[0][0] == entity:
            filtered_path.append(path)
    return filtered_path


def filter_path_with_candidate_node(candidate_nodes, filtered_paths, question_entity):
    if question_entity in candidate_nodes:
        candidate_nodes.remove(question_entity)
    return [
        path
        for path in filtered_paths
        if any(path[-1][0] == node for node in candidate_nodes)
    ]

Extracting paths from subgraph that starts with question entity and ends with candidate nodes

In [47]:
question_entity = extract_entity_from_question(question)
filtered_path = filter_path_with_entity(question_entity, paths)
final_candidate_paths = filter_path_with_candidate_node(candidate_nodes,filtered_path,question_entity)

In [48]:
final_candidate_paths

[[('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'release year'),
  ('1993', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'release year'),
  ('2003', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'has genre'),
  ('Comedy', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'starred actors'),
  ('Drew Barrymore', None)],
 [('Hiromi Nagasaku', 'starring'),
  ('Doppelganger', 'directed by and written by'),
  ('Kiyoshi Kurosawa', None)],
 [('Hiromi Nagasaku', 'starring'), ('Doppelganger', None)]]

Formatting paths for LLM

In [49]:
formatted_paths = format_paths_for_llm(final_candidate_paths)

In [50]:
formatted_paths

['Hiromi Nagasaku starring -> Doppelganger release year -> 1993',
 'Hiromi Nagasaku starring -> Doppelganger release year -> 2003',
 'Hiromi Nagasaku starring -> Doppelganger has genre -> Comedy',
 'Hiromi Nagasaku starring -> Doppelganger starred actors -> Drew Barrymore',
 'Hiromi Nagasaku starring -> Doppelganger directed by and written by -> Kiyoshi Kurosawa',
 'Hiromi Nagasaku starring -> Doppelganger']

In [96]:
response = query_llm(question, formatted_paths, candidate_nodes)

Prompt: You are doing extractive question answering. Strictly use the following pieces of context to choose correct candidate answer to the question from Candidate Answers. Answer directly, without elaboration. Output in comma-separated form.
Question: who is listed as director of [Hiromi Nagasaku] acted films
Paths:
- Hiromi Nagasaku starring -> Doppelganger release year -> 1993
- Hiromi Nagasaku starring -> Doppelganger release year -> 2003
- Hiromi Nagasaku starring -> Doppelganger has genre -> Comedy
- Hiromi Nagasaku starring -> Doppelganger starred actors -> Drew Barrymore
- Hiromi Nagasaku starring -> Doppelganger directed by and written by -> Kiyoshi Kurosawa
- Hiromi Nagasaku starring -> Doppelganger
Candidate Answers: ['Comedy', '1993', '2003', 'Drew Barrymore', 'Kiyoshi Kurosawa', 'Doppelganger']
Answer: 
Kiyoshi Kurosawa



In [100]:
print(f"Question: {question}")
print(f"Ground Truth Answer: {ground_truth_answers}")
print(f"GNN+LLM Generated Answer: {response}")

Question: who is listed as director of [Hiromi Nagasaku] acted films
Ground Truth Answer: ['Kiyoshi Kurosawa']
GNN+LLM Generated Answer: Kiyoshi Kurosawa
