## Prepare environment

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
path_code_repo_1 = '/Users/stephanegarti/home/code/repos/ai-repo-reader/data/ios-1000kB/open-in-place'

## Load data

In [None]:
!pip install GitPython

In [None]:
from langchain.document_loaders import GitLoader # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/git.html

In [None]:
loader = GitLoader(repo_path=path_code_repo_1, branch="main", file_filter=lambda file_path: file_path.endswith(".swift") or file_path.endswith(".h") or file_path.endswith(".m"))

In [None]:
data = loader.load()

In [None]:
len(data)

## Split text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  texts = text_splitter.split_documents(documents)
  return texts

texts = split_docs(data)
print(len(texts))
print(texts[0].page_content)

## Init ChromaDB

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])

In [None]:
!pip install chromadb

In [None]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(texts, embeddings)

## Create the Chain

In [None]:
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA

qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)

In [None]:
query = "What happens in the appdelegate?"
qa.run(query)