In [4]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, Pipeline

from langchain.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy

from sentence_transformers import SentenceTransformer

from config import Config
from knowledge_base import load_knowledge_vdbase, get_embedding_model

In [2]:
config = Config()

In [3]:
KNOWLEDGE_VECTOR_DATABASE = load_knowledge_vdbase('knowledge_vector_base/', config)



In [5]:
embedding_model = get_embedding_model(config)



In [6]:
user_query = "How to build RAG-system?"
query_emb = embedding_model.embed_query(user_query)

In [7]:
topk_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=user_query, k=5)

In [8]:
print("\n==================================Top document==================================")
print(topk_docs[0].page_content)


End-to-End finetuning of RAG (including DPR retriever) for Question Answering.

This finetuning script is actively maintained by [Shamane Siri](https://github.com/shamanez). Feel free to ask questions on the [Forum](https://discuss.huggingface.co/) or post an issue on [GitHub](https://github.com/huggingface/transformers/issues/new/choose) and tag @shamanez.

Others that helped out: Patrick von Platen (@patrickvonplaten), Quentin Lhoest (@lhoestq), and Rivindu Weerasekera (@rivinduw)

The original RAG implementation is able to train the question encoder and generator end-to-end.
This extension enables complete end-to-end training of RAG including the context encoder in the retriever component.
Please read the [accompanying blog post](https://shamanesiri.medium.com/how-to-finetune-the-entire-rag-architecture-including-dpr-retriever-4b4385322552) for details on this implementation.

The original RAG code has also been modified to work with the latest versions of pytorch lightning (versio