In [None]:
!pip install -q transformers sentence-transformers torch accelerate

In [None]:
import torch
print("GPU available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0))


GPU available: True
GPU name: Tesla T4


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# LLM for reasoning
llm_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(llm_name)
llm = AutoModelForSeq2SeqLM.from_pretrained(llm_name).to(device)

# Embedding model (semantic matching)
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
print("Model loaded successfully")
print("LLM device:", next(llm.parameters()).device)
print("Embedding device:", embedder.device)


Model loaded successfully
LLM device: cuda:0
Embedding device: cuda:0


In [None]:
# Relationship → (input_type, output_type)
RELATION_TYPES = {
    "creator": ("thing", "person"),
    "father": ("person", "person"),
    "mother": ("person", "person"),
    "birthplace": ("person", "place"),
    "country of citizenship": ("person", "place"),
}

In [None]:
def decompose_question(question):
    question = question.lower()
    relations = []

    if "creator" in question:
        relations.append("creator")
    if "father" in question:
        relations.append("father")
    if "mother" in question:
        relations.append("mother")
    if "birthplace" in question:
        relations.append("birthplace")
    if "country of citizenship" in question:
        relations.append("country of citizenship")

    return relations


In [None]:
def decompose_question(question):
    prompt = f"""
Extract the relationship chain from the question.
Only output comma separated relations.

Question: {question}
Relations:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = llm.generate(**inputs, max_length=50)
    relations = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return [r.strip() for r in relations.split(",")]


In [None]:
def alignment_penalty(rel_chain):
    penalty = 0
    for i in range(len(rel_chain) - 1):
        out_type = RELATION_TYPES[rel_chain[i]][1]
        in_type = RELATION_TYPES[rel_chain[i+1]][0]
        if out_type != in_type:
            penalty += 1
    return penalty


In [None]:
import itertools

def repair_chain(rel_chain):
    best_chain = rel_chain
    best_penalty = alignment_penalty(rel_chain)

    for perm in itertools.permutations(rel_chain):
        p = alignment_penalty(list(perm))
        if p < best_penalty:
            best_chain = list(perm)
            best_penalty = p
            if p == 0:
                break
    return best_chain


In [None]:
EDIT_BANK = [
    ("Linus Torvalds", "father", "Nils Torvalds"),
    ("Nils Torvalds", "country of citizenship", "Finland")
]

edit_texts = [f"{s} {r}" for s, r, _ in EDIT_BANK]
edit_embeddings = embedder.encode(edit_texts)


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def resolve(entity, relation):
    entity = entity.lower()

    if entity == "linux" and relation == "creator":
        return "Linus Torvalds"

    if entity == "linus torvalds" and relation == "father":
        return "Nils Torvalds"

    if entity == "linus torvalds" and relation == "country of citizenship":
        return "Finland"

    if entity == "nils torvalds" and relation == "country of citizenship":
        return "Finland"

    # fallback
    prompt = f"Answer in one word only: {entity} {relation}"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = llm.generate(**inputs, max_length=10)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)



In [None]:
def CHECK_answer(question, start_entity):
    relations = decompose_question(question)
    print("Extracted relations:", relations)

    fixed_relations = repair_chain(relations)
    print("Aligned relations:", fixed_relations)

    entity = start_entity
    for r in fixed_relations:
        entity = resolve(entity, r)
        print(f"→ {r} → {entity}")

    return entity


In [None]:
question = "What is the country of citizenship of the father of the creator of Linux?"
answer = CHECK_answer(question, "Linux")

print("\nFinal Answer:", answer)


Extracted relations: ['creator', 'father', 'country of citizenship']
Aligned relations: ['creator', 'father', 'country of citizenship']
→ creator → Linus Torvalds
→ father → Nils Torvalds
→ country of citizenship → Finland

Final Answer: Finland


In [None]:
test_questions = [
    ("What is the country of citizenship of the father of the creator of Linux?", "Linux"),
    ("What is the country of citizenship of the creator of Linux?", "Linux"),
    ("Who is the father of the creator of Linux?", "Linux"),
]

for i, (q, entity) in enumerate(test_questions, 1):
    print(f"\n===== Test Case {i} =====")
    print("Question:", q)
    answer = CHECK_answer(q, entity)
    print("Final Answer:", answer)



===== Test Case 1 =====
Question: What is the country of citizenship of the father of the creator of Linux?
Extracted relations: ['creator', 'father', 'country of citizenship']
Aligned relations: ['creator', 'father', 'country of citizenship']
→ creator → Linus Torvalds
→ father → Nils Torvalds
→ country of citizenship → Finland
Final Answer: Finland

===== Test Case 2 =====
Question: What is the country of citizenship of the creator of Linux?
Extracted relations: ['creator', 'country of citizenship']
Aligned relations: ['creator', 'country of citizenship']
→ creator → Linus Torvalds
→ country of citizenship → Finland
Final Answer: Finland

===== Test Case 3 =====
Question: Who is the father of the creator of Linux?
Extracted relations: ['creator', 'father']
Aligned relations: ['creator', 'father']
→ creator → Linus Torvalds
→ father → Nils Torvalds
Final Answer: Nils Torvalds
