Install requirements

In [33]:
%%script echo skipping
pip install -r ../requirements.txt

skipping


### Load environment variables

In [34]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
WORK_DIR = os.environ["WORK_DIR"]

In [35]:
%%script echo skipping
##for google colab (optional)
# This cell is optional and can be skipped
from google.colab import userdata
API_KEY = userdata.get('OPENAI_API_KEY')
WORK_DIR = userdata.get('WORK_DIR')

skipping


In [36]:
print(WORK_DIR)

/workspaces/cyoda-ai/mappings/old-to-new/cs


### Handle unsupported version of sqlite3 (optional)

In [37]:
pip install pysqlite3-binary

Note: you may need to restart the kernel to use updated packages.


In [38]:
import sys

__import__("pysqlite3")
sys.modules["sqlite3"] = sys.modules["pysqlite3"]

### Initialize ChatOpenAI

In [39]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import GitLoader, WebBaseLoader, DirectoryLoader, TextLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema import HumanMessage

In [40]:
llm = ChatOpenAI(
    temperature=0,
    max_tokens=8000,
    model="gpt-3.5-turbo-16k",
    openai_api_key=OPENAI_API_KEY,
)

### Load instructions and entities from the official cyoda repository

In [41]:
pip install -qU esprima esprima tree_sitter tree_sitter_languages

Note: you may need to restart the kernel to use updated packages.


In [42]:
import warnings

warnings.filterwarnings("ignore")
from pprint import pprint

from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)

In [43]:
loader = GenericLoader.from_filesystem(
    WORK_DIR,
    glob="*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=1000),
)
docs = loader.load()

In [44]:
%%script echo skipping
cs_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=1000, chunk_overlap=300
)

splits = cs_splitter.split_documents(docs)

len(splits)

skipping


In [45]:
len(docs)

13

In [46]:
%%script echo skipping
loader = DirectoryLoader(f"{WORK_DIR}",
                          loader_cls=TextLoader)

docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")

skipping


### Split documents and create vectorstore

In [47]:
%%script echo skipping
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

skipping


In [48]:
vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [49]:
count = vectorstore._collection.count()
print(count)

26


### Define prompts for contextualizing question and answering question

In [51]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [52]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

### Answer question

In [53]:
qa_system_prompt = """You are a code assistant. You should analyze and write code in python. \
You should do your best to answer the question. \
Use the following pieces of retrieved context to answer the question. \

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

### Create retrieval chain

In [54]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### Initialize chat history and relevant functions

In [55]:
chat_history = {}

In [56]:
# Function to add a message to the chat history
def add_to_chat_history(id, question, message):
    if id in chat_history:
        chat_history[id].extend([HumanMessage(content=question), message])
    else:
        chat_history[id] = [HumanMessage(content=question), message]

In [57]:
# Function to clear chat history
def clear_chat_history(id):
    if id in chat_history:
        del chat_history[id]

In [58]:
def ask_question(id, question):
    ai_msg = rag_chain.invoke(
        {"input": question, "chat_history": chat_history.get(id, [])}
    )
    add_to_chat_history(id, question, ai_msg["answer"])
    return ai_msg["answer"]

### Start a chat session

In [59]:
import uuid

# Generate a unique ID for the chat session
id = uuid.uuid1()

In [60]:
%%script echo skipping
question = "write a service that does the mapping"
result = ask_question(id, question)
print(result)

skipping


In [None]:
print(chat_history)

In [None]:
#clear chat history if necessary
clear_chat_history(id)