In [1]:
import torch

In [11]:
out = torch.tensor([[0.5, 0.4],
        [0.6, 0.9],
        [0.5, 0.9]])


In [14]:
predicted_answer_nodes = torch.where(out.argmax(dim=1))
predicted_answer_nodes

(tensor([1, 2]),)

In [33]:
if len(predicted_answer_nodes[0])>0:
    message="answers predicted"
    predicted = predicted_answer_nodes[0].tolist()
    
else:
    message="NO answers found"


In [34]:
if 2 in predicted:
    print("yes")
else:
    print("No")

yes


In [30]:
predicted_answer_nodes[0].tolist()

[1, 2]

In [1]:
from torch_geometric.data import Data
from ToTorch.DataBuilder import QADataBuilder
from constants import (
    TRIPLES_PATH,
    ENTITIES_LABELS_PATH,
    PROPERTIES_LABELS_PATH,
    GRAPH_EMBEDDINGS_PATH,
    QUESTIONS_ANSWERS_PATH,
    QUESTIONS_EMBEDDINGS_PATH,
    NUM_EPOCHS,
    NODES_MASK_SIZE
)
import torch
import torch.nn.functional as F
from NeuralNet.GNN import MLP, evaluate_qa_model
from loguru import logger
import sys

logger.remove()
logger.add(sys.stderr, level="DEBUG")

## CREATE DATA
logger.info("Creating Data object")

qa_data_builder = QADataBuilder(
    triples_path=TRIPLES_PATH,
    entities_labels_path=ENTITIES_LABELS_PATH,
    properties_labels_path=PROPERTIES_LABELS_PATH,
    embeddings_path=GRAPH_EMBEDDINGS_PATH,
    questions_answers_path=QUESTIONS_ANSWERS_PATH,
    questions_embeddings_path=QUESTIONS_EMBEDDINGS_PATH,
)

x = qa_data_builder.get_x()
question_train_mask, _, question_val_mask = qa_data_builder.get_questions_masks()
NUM_EPOCHS_PER_QUESTION = int(NUM_EPOCHS / sum(question_train_mask))

## TRAIN MLP
logger.info("Training MLP")
model = MLP(
    num_node_features=(2 * x.shape[1]), dim_hidden_layer=32, num_classes=2
)  # we multiply x.shape by two so as to account for question embedding
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
model.train()

for q_index, question_embedding in enumerate(
    qa_data_builder.questions_embeddings_masked(question_train_mask)
):  # call the questions_iterator from the instance
    question, q_embedding = question_embedding
    q_x = qa_data_builder.get_x(
        to_concat=q_embedding
    )  # add question embedding to node features embedding
    q_y = qa_data_builder.get_y(question=question)
    x_mask = qa_data_builder.get_mask_for_nodes_for_question(question=question, size = NODES_MASK_SIZE) #we only want to consider a subset of the Nodes to make classification easier
    data = Data(x=q_x[x_mask], y=q_y[x_mask])
    if not data.validate():
        logger.error(f"Data object is not valid for question {question}")

    for epoch in range(NUM_EPOCHS_PER_QUESTION):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y, weight = torch.tensor([1/NODES_MASK_SIZE, 1-(1/NODES_MASK_SIZE)])) #we have an imbalanced set; trying to correct for that with weights
        loss.backward()
        optimizer.step()
    logger.debug(
        f"Total Question: {(q_index + 1)}, Total Epochs: {NUM_EPOCHS_PER_QUESTION * (q_index + 1):3d}, Loss: {loss:.4f}"
    )

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'DataBuilder'

In [53]:
evaluate_qa_model(model=model, qa_data_builder=qa_data_builder, mask=train_mask)

[32m2023-08-04 14:27:25.384[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mevaluate_qa_model[0m:[36m34[0m - [34m[1mQuestion: What is the type of Incentive and Commission Management (S/4 CLD)?. Predicted answer = http://www.signavio.com/opal/APQC/PPM/PF/Cost%20to%20perform%20the%20process%20manage%20employee%20orientation%20and%20deployment%20as%20a%20percentage%20of%20the%20total%20cost%20to%20perform%20the%20process%20group%20develop%20and%20counsel%20employees, Actual answer: http://www.signavio.com/opal/SAP/RSA/SCM/SolutionCapability[0m
[32m2023-08-04 14:27:25.440[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mevaluate_qa_model[0m:[36m34[0m - [34m[1mQuestion: What is the type of Incentive and Commission Management?. Predicted answer = http://www.signavio.com/opal/APQC/PPM/PF/10.3.3.2%20Perform%20routine%20asset%20maintenance%20-%20Cross%20Industry, Actual answer: http://www.signavio.com/opal/SAP/RSA/SCM/SolutionCapability[0m
[32m2023-08-04 14:27:25.490[0m

0.0

In [52]:
def _predict_answer(model, data):
    """
    Returns the predicted answer and node index.
    """
    return model(data).max(dim=1)[0].argmax().item()

def evaluate_qa_model(model, qa_data_builder, mask):
    model.eval()
    correct_predictions = 0
    for q_index, question_embedding in enumerate(
        qa_data_builder.questions_embeddings_masked(mask)
    ):
        question, q_embedding = question_embedding
        q_x = qa_data_builder.get_x(
            to_concat=q_embedding
        )  # adding the question embedding to the node embeddings
        
        x_mask = qa_data_builder.get_mask_for_nodes_for_question(question=question, size = NODES_MASK_SIZE)
        def _get_nth_true_position(bool_list, n):
            count = 0
            for i, val in enumerate(bool_list):
                if val:
                    count += 1
                    if count == n:
                        return i 
            logger.error(f"Cannot find {n}th True in")         
        data = Data(x=q_x[x_mask])
        pred_node_idx = _get_nth_true_position(x_mask, _predict_answer(model, data) + 1)
        actual_node_idx = qa_data_builder.get_node_index_for_question_answer(question)
        if pred_node_idx == actual_node_idx:
            logger.debug(f"Correctly predicted answer to question {question}.")
            correct_predictions += 1
        elif pred_node_idx != torch.tensor(0):
            logger.debug(
                f"Question: {question}. Predicted answer = {qa_data_builder.index_to_entity[pred_node_idx]}, Actual answer: {qa_data_builder.index_to_entity[actual_node_idx]}"
            )
        else:
            logger.debug(f"Could not predict any answer")
    return correct_predictions / (q_index + 1)