In [None]:
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loader.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langhchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

%pwd

In [None]:
!mkdir test_repo

In [None]:
repo_path = "test_repo/"

repo = Repo.clone_from("https://github.com/Eltaf-azizi/auto-code-analyzer.git")

In [None]:
loader = GenericLoader.from_filesystem(repo_path,
                                       glob = "**/*",
                                       suffixes=[".py"],
                                       parser = LanguageParser(language=Language.PYTHON))

In [None]:
documents = loader.load()

In [None]:
documents

In [None]:
len(documents)

In [None]:
documents[0]

In [None]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = Language.PYTHON,
                                                                  chunk_size = 500,
                                                                  chunk_overlap = 20)

In [None]:
texts = documents_splitter.split_documents(documents)

In [None]:
texts

In [None]:
len(texts)

In [None]:
from dotenv import load_dotenv
load_dotenv()

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [None]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
embeddings = OpenAIEmbeddings(disallowed_special=())

In [None]:
vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory=".db")

In [None]:
vectordb.persist()

In [None]:
# llm = ChatOpenAI(model_name="gpt-turbo-3.5")
llm = ChatOpenAI()

In [None]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

In [None]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k"}))

In [None]:
question = "What is download_hugging_face_embeddings function?"

In [None]:
result = qa(question)
print(result['answer'])