In [1]:
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
import os
import json

from langchain import hub
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.messages import AIMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.vectorstores import Chroma

In [2]:
with open('secret.json', 'r') as json_file:
    key = json.load(json_file)['key']

os.environ["OPENAI_API_KEY"] = key

In [14]:
base_docs = "documents"

subject = "aipi530"

sub = "material"

doc_path = f"{base_docs}/{subject}/{sub}/"

In [15]:
loader = PyPDFDirectoryLoader(doc_path)

docs = loader.load()

In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [17]:
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [25]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=1500)

qa_system_prompt = """
Given a chat history and the latest user question \
which might reference the chat history, answer the question\
using any required context from the provided chat history.\
Note, if there is no chat history, that is most probably the first question\
in the conversation, so you can just answer it."""

condense_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

condense_q_chain = condense_q_prompt | llm | StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def condense_question(input: dict):
    if input.get("chat_history"):
        return condense_q_chain
    else:
        return input["question"]

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

rag_chain = (
    RunnablePassthrough.assign(context=condense_question | retriever | format_docs)
    | qa_prompt
    | llm
    
)

TypeError: ChatPromptTemplate.from_messages() got an unexpected keyword argument 'max_tokens_limit'

In [22]:
import re
def ext_ques(string):
    question_pattern = re.compile(r'Question:\s+(.*?)\s+```', re.DOTALL)

    match = question_pattern.search(string)

    # Extract the question part
    if match:
        question = match.group(1).strip()
        return question
    else:
        return string