In [21]:
%%writefile requirements.txt
langchain
chromadb
tiktoken
chainlit
openai 
pysqlite

Overwriting requirements.txt


In [22]:
!pip install pysqlite

Collecting pysqlite
  Downloading pysqlite-2.8.3.tar.gz (80 kB)
     ---------------------------------------- 0.0/80.6 kB ? eta -:--:--
     ----- ---------------------------------- 10.2/80.6 kB ? eta -:--:--
     -------------------------------------- 80.6/80.6 kB 901.6 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'error'


  error: subprocess-exited-with-error
  
  × python setup.py egg_info did not run successfully.
  │ exit code: 1
  ╰─> [1 lines of output]
      pysqlite is not supported on Python 3. When using Python 3, use the sqlite3 module from the standard library.
      [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
error: metadata-generation-failed

× Encountered error while generating package metadata.
╰─> See above for output.

note: This is an issue with the package mentioned above, not pip.
hint: See above for details.


In [3]:
%%writefile .env


Writing .env


In [17]:
%%writefile app1.py 
import os
from typing import List

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import (
    ConversationalRetrievalChain,
)
from langchain.chat_models import ChatOpenAI

from langchain.docstore.document import Document
from langchain.memory import ChatMessageHistory, ConversationBufferMemory

import chainlit as cl

print("all_ok")

from dotenv import load_dotenv

load_dotenv() 

OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)


@cl.on_chat_start
async def on_chat_start():
    files = None

    # Wait for the user to upload a file
    while files == None:
        files = await cl.AskFileMessage(
            content="Please upload a text file to begin!",
            accept=["text/plain"],
            max_size_mb=20,
            timeout=180,
        ).send()

    file = files[0]
    
    msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
    await msg.send()
    
    with open(file.path, "r", encoding="utf-8") as f:
        text = f.read()

    # Split the text into chunks
    texts = text_splitter.split_text(text)

    
    # Create a metadata for each chunk
    metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
    
    # Create a Chroma vector store
    embeddings = OpenAIEmbeddings()
    docsearch = await cl.make_async(Chroma.from_texts)(
        texts, embeddings, metadatas=metadatas
    )
    
    message_history=ChatMessageHistory()
    
    memory=ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
        
    )
    
    chain=ConversationalRetrievalChain.from_llm(
        ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
        chain_type="stuff",
        retriever=docsearch.as_retriever(),
        memory=memory,
        return_source_documents=True,
        
    )
    
    # Let the user know that the system is ready
    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)
    
    
@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler()
    
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res["source_documents"] 
    
    
    text_elements = []  # type: List[cl.Text]

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            # Create the text element referenced in the message
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )
        source_names = [text_el.name for text_el in text_elements]

        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()

Writing app1.py


In [11]:
%%writefile app.py
import os 
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ( ConversationalRetrievalChain)
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document 
from langchain.memory import ChatMessageHistory, ConversationBufferWindowMemory
import chainlit as cl 
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

@cl.on_chat_start 
async def on_chat_start():
    files=None 
    while files==None:
        files=await cl.AskFileMessage(
            content="Please upload a text to begin",
            accept=["text/plain"],
            max_size_mb=20,
            timeout=180
        ).send()
    file=files[0]
    msg=cl.Message(content=f"Processing {file.name}..")
    await msg.send()
    with open(file.path,"r", encoding='utf-8') as f:
        text=f.read()
    texts=text_splitter.split_text(text)
    #metadatas=[{"source"}]
    embeddings=OpenAIEmbeddings()
    docsearch=await cl.make_async(Chroma.from_texts)(
        texts, embeddings
    )

    message_history=ChatMessageHistory()
    ConversationBufferMemory(
        memory_key="chat_history" ,
        output_key='answer',
        chat_memory=message_history,
        return_messages=True
    )
    chain=ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(model_name='gpt-3.5-turbo',streaming=True),
        chain_type='stuff',
        retriever=docsearch.as_retriever(),
        memory=memory,
        return_source_documents=True
    )
    msg.content=f"processing {file.name} done. You can ask questions"
    await msg.update()
    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message:cl.Message):
    chain=cl.user_session.get("chain")
    cb=cl.AsyncLangchainCallbackHandler()
    res=await chain.acall(message.content, callbacks=[cb])
    answer=res['answer']
    source_documents=res['source_documents']
    text_elements=[]

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name=f"source_{source_idx}"
            text_elements.append(
            cl.Text(content=source_doc.page_content, name=source_name)
    
        )
        source_names=[text_el.name for text_el in source_documents]
        if source_names:
            answer+=f"\nSources: {', '.join(source_names)}"
        else:
            answer+=f"\nNo sources found"
    await cl.Message(content=answer, elements=text_elements).send()



Overwriting app.py
