## Prepare environment

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
path_code_repo_1 = '<insert here>'

## Load data

In [None]:
!pip install GitPython

In [None]:
from langchain.document_loaders import GitLoader # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/git.html

In [None]:
loader = GitLoader(repo_path=path_code_repo_1, branch="main", file_filter=lambda file_path: file_path.endswith(".swift") or file_path.endswith(".h") or file_path.endswith(".m"))

In [None]:
data = loader.load()

In [None]:
len(data)

## Split text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  texts = text_splitter.split_documents(documents)
  return texts

texts = split_docs(data)
print(len(texts))
print(texts[0].page_content)

## Init ChromaDB

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(
    openai_api_key=os.environ['OPENAI_API_KEY']
)

In [None]:
!pip install chromadb

In [None]:
from langchain.vectorstores import Chroma

vectordb = Chroma.from_documents(texts, embeddings)

## Create the Chain

In [None]:
#from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import VectorDBQA

llm = ChatOpenAI(
    model_name = "gpt-3.5-turbo", 
    temperature=0.7, 
    max_tokens=1000
    )

qa = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=vectordb)

In [None]:
query = "Can you refactor DependeciesController to use Cocoapods?"
qa.run(query)

In [None]:
query = "Can you refactor DependenciesController to remove Carthage?"
qa.run(query)

In [None]:
query = "Show code to refactor DependenciesController to remove Carthage. Output format: markdown"
qa.run(query)