In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Folder on your Google Drive where all the checkpoints will be saved and where the dataset files are stored and loaded from
FOLDER_NAME = "data" # @param {type: "string"}
data_path = FOLDER_NAME + "/"  # Full path to Drive folder
checkpoint_path = data_path

# Load the trained model and tokenizer
model_path = data_path + "test-squad-trained"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForQuestionAnswering.from_pretrained(model_path)

def answer_question(question: str, context: str) -> str:
    # Tokenize the input question and context
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True)

    # Get the input IDs and attention mask
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    # Run the model to get start and end logits
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        start_logits = outputs.start_logits
        end_logits = outputs.end_logits

    # Get the most likely start and end token positions
    start_idx = torch.argmax(start_logits)
    end_idx = torch.argmax(end_logits)

    # Convert token indices to answer text
    tokens = input_ids[0][start_idx:end_idx+1]
    answer = tokenizer.decode(tokens, skip_special_tokens=True)

    return answer

# Example usage
question = "What is the capital of France?"
context = "France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower."

answer = answer_question(question, context)
print(f"Question: {question}")
print(f"Answer: {answer}")


Question: What is the capital of France?
Answer: paris


In [4]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Load the model and tokenizer
model_name = "t5-base"  # You can use "t5-small", "t5-large", or a fine-tuned version
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def generate_answer(question: str, context: str) -> str:
    # Prepare the input text in the format expected by T5
    input_text = f"question: {question} context: {context}"
    inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True)

    # Generate the answer
    with torch.no_grad():
        outputs = model.generate(inputs, max_length=150, num_beams=5, early_stopping=True)
    
    # Decode the generated answer
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Example usage
question = " Can you tell me the capital of France ?"
context = "France, in Western Europe, encompasses medieval cities, alpine villages and Mediterranean beaches. Paris, its capital, is famed for its fashion houses, classical art museums including the Louvre and monuments like the Eiffel Tower."

answer = generate_answer(question, context)
print(f"Question: {question}")
print(f"Answer: {answer}")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Question:  Can you tell me the capital of France ?
Answer: Paris


In [4]:
%pip install sentencepiece

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.7 kB)
Downloading sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0
Note: you may need to restart the kernel to use updated packages.
