In [None]:
import os
import pandas as pd
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

os.environ['OPENAI_API_KEY'] = "<your API key>"

documents = TextLoader(
    "./data/TorQ+Conf.txt",
).load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
for idx, text in enumerate(texts):
    text.metadata["id"] = idx

embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={"k": 20})

llm = ChatOpenAI(temperature=0)

compressor = FlashrankRerank()
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

In [None]:
query = "How would I query my quote table for Google and IBM from last week until now?"
compressed_docs = compression_retriever.get_relevant_documents(query)
qabot = RetrievalQA.from_chain_type(llm=llm, retriever=compression_retriever)
print(f'{query}\n')
print(qabot.invoke(dict(query=query))['result'])