In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma, DocArrayInMemorySearch
from langchain.retrievers import BM25Retriever  
from langchain.retrievers.ensemble import EnsembleRetriever
from langchain_text_splitters import TokenTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker

# Load the PDF document
loader = PyPDFLoader("/Users/pradhikshasuresh/Documents/Python/Space.pdf")
documents = loader.load()

#initiating chunking
text_splitter_character = CharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
text_splitter_recursive = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
text_splitter_token = TokenTextSplitter(chunk_size=1000, chunk_overlap=150)
text_splitter_semantic = SemanticChunker(OpenAIEmbeddings(openai_api_key='MY OPEN_API_KEY'))                                             

#splitting document
docs_character = text_splitter_character.split_documents(documents)
docs_recursive = text_splitter_recursive.split_documents(documents)
docs_token = text_splitter_token.split_documents(documents)
docs_semantic = text_splitter_semantic.split_documents(documents)

# Create embeddings
embeddings = OpenAIEmbeddings(openai_api_key='MY OPEN_API_KEY')

# Create a vector store with Chroma
db_character = Chroma.from_documents(docs_character, embeddings)
db_recursive = Chroma.from_documents(docs_recursive, embeddings)
db_token = Chroma.from_documents(docs_token, embeddings)
db_semantic = Chroma.from_documents(docs_semantic, embeddings)

# Initialize the LLM model
llm_model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key='MY OPEN_API_KEY')

#Combination1 - Similarity retriever with character chunking
# Create a similarity retriever
retriever_similarity1 = db_character.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Create QA chain 
qa_1 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_similarity1
)

#Combination2 - Similarity retriever with recursive chunking
# Create a similarity retriever
retriever_similarity2 = db_recursive.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Create QA chain 
qa_2 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_similarity2
)

#Combination3 - Similarity retriever with token chunking
# Create a similarity retriever
retriever_similarity3 = db_token.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Create QA chain 
qa_3 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_similarity3
)

#Combination4 - Similarity retriever with semantic chunking
# Create a similarity retriever
retriever_similarity4 = db_semantic.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# Create QA chain 
qa_4 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_similarity4,
)

#Combination5 - BM25 retriever with Character chunking
# Create a BM25 retriever
retriever_bm251 = BM25Retriever.from_documents(docs_character)
# Create QA chain for BM25 retriever
qa_5 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_bm251,
)

#Combination6 - BM25 retriever with Recursive chunking
# Create a BM25 retriever
retriever_bm252 = BM25Retriever.from_documents(docs_recursive)
# Create QA chain for BM25 retriever
qa_6 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_bm252,
)

#Combination7 - BM25 retriever with Token chunking
# Create a BM25 retriever
retriever_bm253 = BM25Retriever.from_documents(docs_token)
# Create QA chain for BM25 retriever
qa_7 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_bm253,
)

#Combination8 - BM25 retriever with Semantic chunking
# Create a BM25 retriever
retriever_bm254 = BM25Retriever.from_documents(docs_semantic)
# Create QA chain for BM25 retriever
qa_8 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=retriever_bm254,
)

#Combination9 - Ensemble retriever with Character chunking
#Initialize the Ensemble Retriever with BM25 and similarity retrievers
ensemble_retriever1 = EnsembleRetriever(retrievers=[retriever_similarity1, retriever_bm251])
# Create QA chain for ensemble retriever
qa_9 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=ensemble_retriever1,
    chain_type="stuff",
)

#Combination10 - Ensemble retriever with Recursive chunking
#Initialize the Ensemble Retriever with BM25 and similarity retrievers
ensemble_retriever2 = EnsembleRetriever(retrievers=[retriever_similarity2, retriever_bm252])
# Create QA chain for ensemble retriever
qa_10 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=ensemble_retriever2,
    chain_type="stuff",
)

#Combination11 - Ensemble retriever with Recursive chunking
#Initialize the Ensemble Retriever with BM25 and similarity retrievers
ensemble_retriever3 = EnsembleRetriever(retrievers=[retriever_similarity3, retriever_bm253])
# Create QA chain for ensemble retriever
qa_11 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=ensemble_retriever3,
    chain_type="stuff",
)

#Combination12 - Ensemble retriever with Recursive chunking
#Initialize the Ensemble Retriever with BM25 and similarity retrievers
ensemble_retriever4 = EnsembleRetriever(retrievers=[retriever_similarity4, retriever_bm254])
# Create QA chain for ensemble retriever
qa_12 = ConversationalRetrievalChain.from_llm(
    llm=llm_model,
    retriever=ensemble_retriever4,
    chain_type="stuff",
)

chat_history=[]
question1="Explain the concept of vacuum"
question2="Briefly explain what are charged particles and their 3 primary sources"
question3="What is freefall?"

# Define combinations and their respective QA objects
combinations = [
    (qa_1, "Combination 1"), (qa_2, "Combination 2"), (qa_3, "Combination 3"),
    (qa_4, "Combination 4"), (qa_5, "Combination 5"), (qa_6, "Combination 6"),
    (qa_7, "Combination 7"), (qa_8, "Combination 8"), (qa_9, "Combination 9"),
    (qa_10, "Combination 10"), (qa_11, "Combination 11"), (qa_12, "Combination 12")
]

# Define questions
questions = [question1, question2, question3]

# Initialize results storage
results = {}

# Iterate over combinations
for qa, combination_name in combinations:
    results[f"Results for {combination_name}"] = []
    for question in questions:
        results[f"Results for {combination_name}"].append(qa.invoke({"question": question, "chat_history": chat_history}))

# Example of accessing results
for combination, result_list in results.items():
    print(combination)
    for idx, result in enumerate(result_list):
        print(f"Question {idx + 1}: {result}")
        # Print a blank line after each answer
        print()  # This prints a newline
    print() 
