In [None]:
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [4]:
import os
from git import Repo

In [5]:
REPO_PATH = "./repo"
REPO_URL = "https://github.com/langchain-ai/langchain.git"
PARSER_THRESHOLD = 500

In [None]:
REPO = Repo.clone_from(REPO_URL, to_path=REPO_PATH)

In [7]:
LOADER = GenericLoader.from_filesystem(
    f"{REPO_PATH}/libs/core/langchain_core/",
    glob="**/*", # Load all files recursively
    suffixes=[".py"], # Only load Python files
    exclude=["**/non-utf-8-encoding.py"], # Exclude non-UTF-8 files
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=PARSER_THRESHOLD),
)

DOCUMENTS = LOADER.load()

In [None]:
# Split documents into chunks
SPLITTER = RecursiveCharacterTextSplitter.from_language(Language.PYTHON, chunk_size=1000, chunk_overlap=200)

SPLIT_DOCUMENTS = SPLITTER.split_documents(DOCUMENTS)

3085

In [9]:
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

In [None]:
# create the vector store
VECTOR_STORE = Chroma.from_documents(
    documents=SPLIT_DOCUMENTS,
    embedding=OpenAIEmbeddings(disallowed_special=()),
)

# create the retrieval chain
RETRIEVAL_CHAIN = VECTOR_STORE.as_retriever(
    search_type="mmr", # use Maximal Marginal Relevance
    search_kwargs={"k": 8},
)

In [None]:
LLM = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=200)

In [None]:
PROMPT = ChatPromptTemplate.from_messages([
    ("system", "You are a code review assistant. Provide concise and relevant code reviews based on the provided code \snippets."),
    ("user", "{input}"),
])

DOCUMENT_CHAIN = create_stuff_documents_chain(
    llm=LLM,
    prompt=PROMPT,
)

QA_CHAIN = create_retrieval_chain(
    RETRIEVAL_CHAIN,
    DOCUMENT_CHAIN,
)


In [None]:
RESPONSE = QA_CHAIN.invoke({
    "input": "Can you provide a code review for the LangChain core library? What are some potential improvements or issues in the codebase?"
})

In [None]:
print(RESPONSE["answer"])