In [3]:
import torch
from transformers import AutoTokenizer, AutoModel
import faiss
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

def get_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings

documents = [
    "Paris is the capital of France.",
    "Berlin is the capital of Germany.",
    "Tokyo is the capital of Japan."
]

document_embeddings = get_embeddings(documents).cpu().numpy()

index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

from transformers import GPT2LMHeadModel, GPT2Tokenizer

gen_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gen_model = GPT2LMHeadModel.from_pretrained("gpt2")

def generate_response(query, context):
    input_text = f"Query: {query}\nContext: {context}\nAnswer:"
    inputs = gen_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = gen_model.generate(inputs['input_ids'], max_length=100, num_beams=5, no_repeat_ngram_size=2)
    generated_text = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

query = "What is the capital of France?"

query_embedding = get_embeddings([query]).cpu().numpy()
D, I = index.search(query_embedding, k=1)

retrieved_docs = [documents[i] for i in I[0]]

context = " ".join(retrieved_docs)
response = generate_response(query, context)

print("Generated Response:", response)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Response: Query: What is the capital of France?
Context: Paris is the capital of France.
Answer: The capital is Paris, which means "the capital" in French. It is also the name of the city of Paris. The French word for "capital" is "Paris".
Question: Why do you think that the French language is so difficult to understand? Why is it so hard to learn French? Answer: Because French is a language that is hard for English speakers to grasp.
