## Prepare environment

In [None]:
%pip install python-dotenv

In [None]:
import os
from pathlib import Path
path_dir_script = Path(os.getcwd())
path_dir_root = path_dir_script.parent
path_code_repo_1 = Path(path_dir_root, 'data/tuist')

In [None]:
from dotenv import load_dotenv, find_dotenv
path_file_dotenv = Path(path_dir_root, '.env')
load_dotenv(path_file_dotenv)

## Load data

In [None]:
%pip install GitPython

In [None]:
from langchain.document_loaders import GitLoader # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/git.html

In [None]:
loader = GitLoader(repo_path=path_code_repo_1, branch="main", file_filter=lambda file_path: file_path.endswith(".swift") or file_path.endswith(".h") or file_path.endswith(".m"))

In [None]:
data = loader.load()

In [None]:
len(data)

## Split text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  texts = text_splitter.split_documents(documents)
  return texts

texts = split_docs(data)
print(len(texts))
print(texts[0].page_content)

## Init ChromaDB

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(
    openai_api_key=os.environ['OPENAI_API_KEY']
)

In [None]:
!pip install chromadb

In [38]:
import os

def check_folder_exists(folder_path):
    return os.path.exists(folder_path) and os.path.isdir(folder_path)

In [51]:
from langchain.vectorstores import Chroma

force_reembed = False
persist_directory = "chroma_db/"

if not check_folder_exists(persist_directory) or force_reembed:
    vectordb = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
    vectordb.persist()
else:
    vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


Using embedded DuckDB with persistence: data will be stored in: chroma_db/


## Create the Chain

In [33]:
#from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import VectorDBQA

llm = ChatOpenAI(
    model_name = "gpt-3.5-turbo", 
    temperature=0.7, 
    max_tokens=1000
    )

qa = VectorDBQA.from_chain_type(llm=llm, chain_type="stuff", vectorstore=vectordb)

In [34]:
query = "List all tuist features that you can call from the terminal, with examples."
qa.run(query)

"Tuist is a command line tool that helps developers manage their Xcode projects and workspaces. Here are some examples of the most commonly used commands:\n\n1. `tuist init` - Initializes a new Tuist project in the current directory.\n\n2. `tuist generate` - Generates an Xcode project from a Tuist project manifest.\n\n3. `tuist build` - Builds the current project.\n\n4. `tuist test` - Runs tests in the current project.\n\n5. `tuist lint` - Lints the current project.\n\n6. `tuist graph` - Generates a visual graph of the current project's dependencies.\n\n7. `tuist edit` - Opens the generated Xcode project in Xcode.\n\n8. `tuist env` - Displays information about the current environment.\n\n9. `tuist dependencies fetch` - Fetches the dependencies specified in the project's manifest.\n\n10. `tuist dependencies update` - Updates the dependencies specified in the project's manifest.\n\n11. `tuist version` - Displays the version of Tuist that is currently installed.\n\nNote that these are jus

In [None]:
query = "Can you refactor DependenciesController to remove Carthage?"
qa.run(query)

In [None]:
query = "Show code to refactor DependenciesController to remove Carthage. Output format: markdown"
qa.run(query)