In [11]:
import pandas as pd
from langchain_community.document_loaders import DataFrameLoader
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
import os
import bs4
from PyPDF2 import PdfReader
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain


import warnings

warnings.filterwarnings("ignore")

load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# Load Data

In [6]:
def load_data(
        uploaded_files,
        # dataset_name = 'imdb_top_1000.csv'
    ):
    TEXT = ""

    # for file in uploaded_files:
    with open(uploaded_files, 'rb') as f:
        # if file.type == "application/pdf":
        pdf_reader = PdfReader(f)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        TEXT += text
        docs = TEXT
    return docs

def embbedings_and_store(docs):
    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    persist_directory = "chroma_db"
    
    # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    # splits = text_splitter.split_documents(docs)
    
    text_splitter = CharacterTextSplitter(separator=" ", chunk_size=5000, chunk_overlap=1000, length_function=len)
    text_chunks = text_splitter.split_text(docs)
    
    vectorstore = Chroma.from_texts(text_chunks, embedding=embeddings, persist_directory=persist_directory)
    retriever = vectorstore.as_retriever(
        # search_kwargs={"k": 2}
    )
    # db = Chroma.from_documents(
    #         documents=docs, embedding=embeddings, persist_directory=persist_directory
    # )
    return retriever

In [7]:
docs = load_data(uploaded_files='gobook.pdf')
vector = embbedings_and_store(docs=docs)

In [8]:
vector

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7fd2cd57f220>)

# Create Chain

## model 1

In [None]:
def create_conversational_chain(retriever):
    ### Contextualize question ###
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    history_aware_retriever = create_history_aware_retriever(
        llm, retriever, contextualize_q_prompt
    )


    ### Answer question ###
    system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        "{context}"
    )
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
    
    store = {}
    
    def get_session_history(session_id: str) -> BaseChatMessageHistory:
        if session_id not in store:
            store[session_id] = ChatMessageHistory()
        return store[session_id]


    conversational_rag_chain = RunnableWithMessageHistory(
        rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )
    return conversational_rag_chain

## model 2

In [9]:
def start_conversation(vector_embeddings):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(
        memory_key='chat_history',
        return_messages=True
    )
    conversation = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_embeddings,
        memory=memory
    )

    return conversation

In [12]:
conv = start_conversation(vector)
conv

ConversationalRetrievalChain(memory=ConversationBufferMemory(return_messages=True, memory_key='chat_history'), combine_docs_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fd2cd92a6e0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fd2cdaf45e0>, openai_api_key=SecretStr('**********'), openai_proxy='')), document_variable_name='context'), question_generator=LLMChain(prompt=PromptTemplate(input_variables=['chat_history', 

# Chatting

In [None]:
def conversation_chat(query, chain, history=None):
    result = chain.invoke(
        {"input": query},
        config={
            "configurable": {"session_id": "abc123"}
        },  # constructs a key "abc123" in `store`.
    )["answer"]
    return result

In [None]:
def run_chat(query_input, file, history = None):
    
    # data_loader = load_data(
    #     'dataset/imdb_top_1000.csv'
    # )
    vectorstores = embbedings_and_store(file)
    chain = create_conversational_chain(vectorstores)
    result = conversation_chat(
        query_input, 
        chain=chain, 
        history=history
    )

    return result

# Test

In [14]:
result = conv.invoke(
    {
        "question":"tell me just a brief, while class based programming not supported on go, what can we do while we want to make a class ?"
    }
)["answer"]

result

'In Go, which does not support traditional class-based programming, you can achieve similar functionality using structs and methods. By defining a struct to represent data and attaching methods to that struct, you can create a concept similar to classes in other languages. This approach allows you to encapsulate data and behavior together, providing a way to organize and interact with your code effectively.'

In [15]:
result = conv.invoke(
    {
        "question":"can you show me how create that ?"
    }
)["answer"]

result

'In Go, you can create a class-like structure using structs and methods. Here\'s an example to demonstrate how you can achieve this:\n\n```go\npackage main\n\nimport (\n\t"fmt"\n)\n\n// Define a struct to represent a person\ntype Person struct {\n\tName string\n\tAge  int\n}\n\n// Define a method for the Person struct to introduce the person\nfunc (p Person) Introduce() {\n\tfmt.Printf("Hi, my name is %s and I am %d years old.\\n", p.Name, p.Age)\n}\n\nfunc main() {\n\t// Create an instance of the Person struct\n\tp := Person{Name: "Alice", Age: 30}\n\n\t// Call the Introduce method on the Person instance\n\tp.Introduce()\n}\n```\n\nIn this example:\n- We define a struct `Person` with fields `Name` and `Age`.\n- We define a method `Introduce` for the `Person` struct that prints out an introduction.\n- We create an instance `p` of the `Person` struct with a name "Alice" and age 30.\n- We call the `Introduce` method on the `p` instance to introduce the person.\n\nThis demonstrates how yo