In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import pipeline

In [2]:
tokenizer = AutoTokenizer.from_pretrained("akjindal53244/Llama-3.1-Storm-8B")
model = AutoModelForCausalLM.from_pretrained("akjindal53244/Llama-3.1-Storm-8B")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/910 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/22.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

Functions for LLM (Answer extraction)

In [72]:
def create_prompt(question, paths):
    prompt = f"Question: {question}\nPaths:\n"
    for path in paths:
        prompt += f"- {path}\n"
    prompt += "Answer:"
    return prompt

In [73]:
def format_paths_for_llm(candidate_paths):
    formatted_paths = []
    for path in candidate_paths:
        path_sentence = " -> ".join(
            f"{node} {relation}" if relation else f"{node}"
            for node, relation in path
        )
        formatted_paths.append(path_sentence)
    return formatted_paths

In [74]:
def query_llm(question, paths):
    prompt = create_prompt(question, paths)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=300, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


In [75]:
def extract_answers(response, expected_answers):
    response_tokens = response.split()
    found_answers = [answer for answer in expected_answers if answer in response_tokens]
    return found_answers

Functions for LLM (Qtype prediction)

In [8]:
entity_types = ['actor', 'director', 'genre', 'language', 'movie', 'writer', 'year']

In [76]:
def create_qtype_prompt(question):
    prompt = (
        f"Given the question: '{question}', predict the exact sequence of entity types to follow in a knowledge graph (KG) "
        "to find the answer. This sequence should trace the path needed to reach the answer, one entity at a time, by 'hopping' through connected entities.\n\n"
        "Follow these steps to construct the sequence:\n"
        "1. Identify the starting entity type based on the information given in the question.\n"
        "2. Determine each 'hop' or transition to the next entity type, thinking through how each hop brings you closer to the answer.\n"
        "3. Continue hopping through connected entity types until you reach the final entity type that answers the question.\n\n"
        "Use only these entity types: actor, director, genre, language, movie, writer, and year.\n\n"
        "Format your response as '<type_0>_to_<type1>_to_<type2>_to_<type3>' where each type shows the entity needed in the sequence.\n\n"
        "Example:\n"
        "Question: 'The films that share actors with the film [Dil Chahta Hai] were released in which years?'\n"
        "Predicted sequence of entity hops: movie_to_actor_to_movie_to_year\n\n"
        "Now, predict the entity type sequence for the given question by identifying each hop required to reach the answer."
    )
    return prompt


In [77]:
def predict_qtype(question):
    prompt = create_qtype_prompt(question)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=500, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predicted_qtype = extract_qtype_from_response(response)

    return predicted_qtype

In [78]:
import re

def extract_qtype_from_response(response):
    entity_types = {"actor", "director", "genre", "language", "movie", "writer", "year"}

    match = re.search(r"final answer is:\s*([a-zA-Z]+(?:_to_[a-zA-Z]+)+)", response, re.IGNORECASE)

    if match:
        qtype_sequence = match.group(1)
        qtype_elements = qtype_sequence.split("_to_")
        if all(element in entity_types for element in qtype_elements):
            return qtype_sequence
    return ""

Testing

In [79]:
question = "What are the languages spoken in the films starred by [Assault on a Queen] actors?"
expected_answers = ["German", "Spanish", "Italian"]

candidate_paths = [
    [("Assault on a Queen", "starred_actors"), ("Frank Sinatra", "starred_in"), ("Von Ryan's Express", "in_language"), ("German", None)],
    [("Assault on a Queen", "starred_actors"), ("Virna Lisi", "starred_in"), ("Casanova 70", "in_language"), ("Italian", None)],
    [("Assault on a Queen", "starred_actors"), ("Frank Sinatra", "starred_in"), ("Von Ryan's Express", "has_genre"), ("Adventure", None)],
    [("Assault on a Queen", "starred_actors"), ("Frank Sinatra", "starred_in"), ("Von Ryan's Express", "release_year"), ("1965", None)],
    [("Assault on a Queen", "starred_actors"), ("Virna Lisi", "starred_in"), ("Casanova 70", "has_genre"), ("Comedy", None)],
    [("Assault on a Queen", "directed_by"), ("Jack Donohue", "written_by"), ("Rod Serling", "has_genre"), ("Drama", None)],
]

formatted_paths = format_paths_for_llm(candidate_paths)

In [80]:
print("Question:", question)
predicted_qtype = predict_qtype(question)
print("Predicted Qtype:", predicted_qtype)

Question: What are the languages spoken in the films starred by [Assault on a Queen] actors?
Predicted Qtype: movie_to_actor_to_movie_to_language


In [82]:
response = query_llm(question, formatted_paths)
found_answers = extract_answers(response, expected_answers)

print("Found Answers:", found_answers)
print("Correct" if found_answers else "Wrong")


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Found Answers: ['German', 'Italian']
Correct
