## Prepare environment

In [None]:
from dotenv import load_dotenv
load_dotenv()

## Load data

In [None]:
!pip install GitPython

In [None]:
from langchain.document_loaders import GitLoader # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/git.html

In [None]:
loader = GitLoader(repo_path="./data/ios-example-lt64kB/TuningFork", branch="master", file_filter=lambda file_path: file_path.endswith(".swift") or file_path.endswith(".h") or file_path.endswith(".m"))

In [None]:
data = loader.load()

In [None]:
len(data)

## Split text

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  texts = text_splitter.split_documents(documents)
  return texts

texts = split_docs(data)
print(len(texts))

## Get embeddings

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])

In [None]:
print(embeddings)

## Create index

In [None]:
import pinecone

In [None]:
pinecone.init(
    api_key=os.getenv('PINECODE_API_KEY'),
    environment=os.getenv('PINECODE_ENVIRONMENT')
)

In [None]:
# Check existing indexes
print(pinecone.list_indexes())

In [None]:
index_name = "ai-repo-reader-1"

In [None]:
# Clean up previous index before creating new one
pinecone.delete_index(index_name)

In [None]:
pinecone.create_index(index_name, dimension=1536)

In [None]:
from langchain.vectorstores import Pinecone
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [None]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

llm = OpenAI(temperature=0, openai_api_key=os.environ['OPENAI_API_KEY'])
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "What does the Tuner class do?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

In [None]:
print(docs[2].page_content)

In [None]:
query = "How to use the device's microphone to interpret frequency?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

In [None]:
query = "What does the delegate of the Tuner class do?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

In [None]:
print(docs[0].page_content)

In [None]:
query = "Show me all code extracts where AKFrequencyTracker is used"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

In [None]:
print(docs[0].page_content)