In [None]:
#must have python 3.10 installed
!pip install pandas langchain-openai langchain-core langchain-community langchain pypdf chromadb rank-bm25 cohere

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.retrievers import BM25Retriever
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.retrievers.document_compressors import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.chat import MessagesPlaceholder

In [None]:
import os

my_openai_api_key = "sk-proj-tNP7Vnu7BbwUEFCJ6HjDT3BlbkFJMSochvTWayhj0tuxLKsp"
os.environ["COHERE_API_KEY"] = "1JFzujZ48tZDZj2Uk3QTYdebYFiCNmczn8hxvCgu"

# Data Loading and Vectorization

In [None]:
file_path = r"temp.txt"
data = []
with open(file_path, 'r') as file:
    for line in file:
        if line != '\n':
            data.append(line)


In [None]:
from langchain_community.document_loaders.pdf import PyPDFLoader

file_paths = [
    "5e/core/D&D 5e - DM Guide.pdf",
    "5e/core/D&D 5E - Monster Manual.pdf",
    "5e/core/D&D 5E - Player's Handbook.pdf",
    "5e/expansions/D&D 5E - Mordenkainen's Tome of Foes.pdf",
    "5e/expansions/D&D 5e - Tasha’s Cauldron of Everything.pdf",
    "5e/expansions/D&D 5E - Volo's Guide to Monsters.pdf",
    "5e/expansions/D&D 5E - Xanathar's Guide to Everything.pdf",
    "homebrew/Heiraxia.pdf"
]
page_contents = []
for file_path in file_paths:
    pdf_loader = PyPDFLoader(file_path)
    docs = pdf_loader.load()
    page_contents += [doc.page_content for doc in docs]


In [None]:
# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100, separators=['.', '\n'])

# Function to prepare documents with metadata
def prepare_documents_with_metadata(data, page_contents):
    """
    Prepare a list of documents with metadata from a list of articles.
    :param articles: A list of articles in JSON format.
    :return: A list of Document objects.
    """
    
    prepared_docs = []
    title = ""
    for line in data:
        # Extract necessary fields from each article
        if ":" in line:
            title = line
        else:
            content = line
            # Create a Document object with metadata
            doc = Document(page_content=content, metadata={"title": title})
            prepared_docs.append(doc)
    for page in page_contents:
        doc = Document(page_content=page, metadata={"title": "PDF Page"})
        prepared_docs.append(doc)
    
    return prepared_docs


docs = prepare_documents_with_metadata(data, page_contents) 

print(docs[0])

In [None]:
print(docs[10].metadata)

## Defining Semantic & Lexical Retrievers

In [None]:
# Using the OpenAI embedding model to create vector embeddings for each chunk
embeddings = OpenAIEmbeddings(openai_api_key=my_openai_api_key)
# Storing chunks along with their vector embeddings into a Chroma database
db = Chroma.from_documents(docs, embeddings)
# Defining our semantic retriever, which will return the top-7 most semantically relevant chunks
semantic_retriever = db.as_retriever(k=7)
# Defining our lexical retriever, which uses the BM25 algorithm, to retrieve the top-7 most
# lexically similar chunks
bm25_retriever = BM25Retriever.from_documents(docs, k=7)
# Merge retrievers together into a single retriever, which will return up to 10 chunks
merged_retriever = MergerRetriever(retrievers=[semantic_retriever, bm25_retriever])

## Reranker and Final Retriever

In [None]:
# We are using Cohere Rerank as our compression algorithm
compressor = CohereRerank( top_n=5)
# We define a new retriever than first uses the base_retriever to retrieve documents and then the
# base_compressor to filter them
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=merged_retriever)

# LLM & Chain Setup

In [None]:
# ChatGPT
llm = ChatOpenAI(openai_api_key=my_openai_api_key, model="gpt-3.5-turbo", temperature=0.0)

## Defining First Chain
### This chain's job is to take a question and a chat history and create a version of the question that is contextualized with the chat history

In [None]:
# Define a prompt to contextualize the user's question

contextualize_system_prompt = "Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_chain = contextualize_prompt | llm | StrOutputParser()

## Let's Test the Contextualization Chain

## Creating the RAG Template

In [None]:
# Create our prompt
rag_template = """
{context}
You are a search engine for a Dungeons and Dragons 5th edition Dungeon Master who is accessing rules, stats, and details
from the rulebooks as well as homebrew content. 
Use any information from the context to anwser the question. Site where you find the knowledge. 
If you can't find information from the documents, use your own knowledge to answer the question.
Question: {question}
"""
rag_prompt = ChatPromptTemplate.from_template(rag_template)

## Creating Main Chain
### This chain needs to be able to dynamically determine if the question needs to be contextualized (which is not the case when there is no chat history)

In [None]:
# itemgetter is built into Python and allows you to create a function that
# returns the value of a key
from operator import itemgetter
# Create the chain
from langchain_core.output_parsers import StrOutputParser

chain = (
    RunnablePassthrough()
    | RunnablePassthrough().assign(context = itemgetter("question") | compression_retriever)
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [None]:
import tkinter as tk
from tkinter import messagebox
from tkinter import scrolledtext

# Create the main window
window = tk.Tk()
window.title("Tower of Inquiry")

# Improved color scheme with an accent color
paper_background = '#f8f8f8'  # 
text_color = 'black'
accent_color = '#000000'  # A maroon accent color for interactive elements

# Use modern fonts
font_family = 'Baskerville'
base_font_size = 22
font_style = 'italic'

# Set the color scheme to dark mode with improved aesthetics
window.configure(bg=paper_background)

# Create a scrolled text widget for the chat history with improved visuals
chat_history = scrolledtext.ScrolledText(window, width=70, height=10, bg=paper_background, fg=text_color, font=(font_family, base_font_size, font_style))
chat_history.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)  # Added padding

# Create a label and entry for the question with refined styling
question_label = tk.Label(window, text="Enter your inquiry:", bg=paper_background, fg=text_color, font=(font_family, base_font_size))
question_label.pack(pady=(10, 10))  # Added vertical padding for spacing
question_entry = tk.Entry(window, width=100, bg=paper_background, fg=text_color, insertbackground=text_color)  # Ensure cursor is visible
question_entry.pack(pady=(10, 10))  # Added vertical padding for spacing

def get_question(event=None):
    question = question_entry.get()
    if question.strip() == "":
        messagebox.showerror("Error", "Please enter an inquiry.")
        return
    chat_history.insert(tk.END, f"\nUser: {question}\n\n")
    # Simulate response for demonstration
    response = chain.invoke({"question": question})
    chat_history.insert(tk.END, f"Lorekeeper: {response}\n")
    chat_history.see(tk.END)
    question_entry.delete(0, tk.END)

# Create a button to submit the question with improved design
submit_button = tk.Button(window, text="Inquire", command=get_question, background=accent_color, foreground=text_color, font=(font_family, base_font_size), bd=0, padx=10, pady=5)
submit_button.pack()

# Bind the Enter key to submit the question
window.bind('<Return>', get_question)

# Run the main event loop
window.mainloop()
