In [1]:
!pip install langchain openai GitPython chromadb unstructured markdown sentence_transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain.document_loaders import GitLoader

code_loader = GitLoader(
    clone_url="https://github.com/hpi-swa-lab/godot-pronto",
    repo_path="./pronto",
    branch="master",
    file_filter=lambda file_path: file_path.endswith(".gd")
)
code_documents = code_loader.load()

In [4]:
from langchain.document_loaders import UnstructuredMarkdownLoader

readme_loader = UnstructuredMarkdownLoader("pronto/README.md")
readme_document = readme_loader.load()

In [5]:
separators = [
    # First, try to split along class definitions
    "\nclass ",
    "\nfunc ",
    "\n\tfunc ",
    # Now split by the normal type of lines
    "\n\n",
    "\n",
    " ",
    "",
]

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language

In [7]:
code_splitter = RecursiveCharacterTextSplitter(separators=separators, chunk_size=512, chunk_overlap=200)
code_splits = code_splitter.split_documents(code_documents)

In [8]:
readme_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.MARKDOWN, chunk_size=100, chunk_overlap=20)
readme_splits = readme_splitter.split_documents(readme_document)

In [9]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import OpenAIEmbeddings

In [10]:
embeddings = OpenAIEmbeddings()

In [11]:
from langchain.vectorstores import Chroma

In [12]:
code_db = Chroma.from_documents(documents=code_splits, embedding=embeddings)
code_retriever = code_db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 4},
)

In [13]:
readme_db = Chroma.from_documents(documents=readme_splits, embedding=embeddings)
readme_retriever = readme_db.as_retriever(
    search_type="mmr",  # Also test "similarity",
    search_kwargs={"k": 8}
)

In [14]:
from langchain.retrievers import EnsembleRetriever

retriever = EnsembleRetriever(
    retrievers=[code_retriever, readme_retriever], weights=[0.2, 0.8]
)

In [15]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory

llm = ChatOpenAI(temperature=0)
memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

In [20]:
question = "How do I use the wiring system?"
result = qa(question)
print(result)

{'question': 'How do I use the wiring system?', 'chat_history': [SystemMessage(content="The human asks the AI to provide 20 questions that a user would ask a chatbot built to help with building games using the Pronto framework. The AI provides a comprehensive list of questions, covering various aspects of the framework, its features, limitations, and usage. The questions cover topics such as accessing the Pronto framework, troubleshooting issues, using external assets, creating prototypes, customizing behaviors, managing game state, collaborating with other developers, optimizing performance, integrating external libraries, organizing code, exporting to different platforms, testing within the framework, and known issues or bugs. The AI also mentions that the accuracy and availability of answers may vary depending on the specific chatbot's knowledge and capabilities. The human asks how to connect two behaviors, and the AI explains that it can be done using the framework's wiring system.