In [1]:
# from transformers import AutoModelWithHeads, AutoTokenizer
import faiss
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

model = SentenceTransformer("/Users/likhit/Desktop/Projects/RAG/1fineeeeminilm_proj512_only_dense")


# Load FAISS index + mapping
index = faiss.read_index("/Users/likhit/Desktop/Projects/RAG/model/retriever/testmodel/chunks_test_faiss_store/context_index.faiss")
context_df = pd.read_csv("/Users/likhit/Desktop/Projects/RAG/model/retriever/testmodel/chunks_test_faiss_store/context_mapping.csv")

model.eval()
model.to(device)

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

In [3]:
def retrieve_top_k_contexts(question, k=5):
    query_vec = model.encode([question], convert_to_numpy=True)
    query_vec = np.expand_dims(query_vec, axis=0) if query_vec.ndim == 1 else query_vec

    _, indices = index.search(query_vec, k)
    
    retrieved_contexts = [context_df.iloc[i]["chunk"] for i in indices[0]]
    return retrieved_contexts

In [4]:
def build_prompt(question, contexts):
    prompt = f"Answer the following question using complete sentences based only on the given context.\n"
    prompt += "\n".join(contexts)
    prompt += f"\n\nQuestion: {question}\nAnswer:"
    return prompt

In [5]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model2 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base").to(device)

def generate_answer(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model2.generate(**inputs, max_length=512)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [6]:
question = "Who is the music director of the Quebec Symphony Orchestra?"
contexts = retrieve_top_k_contexts(question, k=5)
prompt = build_prompt(question, contexts)
answer = generate_answer(prompt)

print("Question:", question)
print("\nPrompt:\n", prompt)
print("\Answer:", answer)

Question: Who is the music director of the Quebec Symphony Orchestra?

Prompt:
 Answer the following question using complete sentences based only on the given context.
Daphnis and Chloé About Fabiel Gabel Français des Jeunes (French Youth Orchestra). Following. G. His rapidly-expanding U.S. presence has seen him leading the Cleveland Orchestra, Houston Symphony Orchestra, Detroit Symphony Orchestra, San Diego Symphony Orchestra and more. Fabré Hamelin, Beatrice
into "Fidelio.")" Beethoven; likewise, a delicate chorale for winds; and the entwining of solo voices, chorus and instrumental forces, all commandingly marshaled by Tilson Thomas, making for a vast and expressive emotional matrix. The San Francisco Symphony Chorus, directed by Ragnar Bohlin, gripped
HOUSTON (Jan. 23, 2018) – Fabien Gabel, music director of the Quebec Symphony Orchestra, returns to Houston to lead the Houston Symphony in Ravel’s Daphnis and Chloé on Feb. 2 and 3 at 8 p.m. and Feb. 4 at 2:30 p.m. in Jones Hall. Re