In [None]:
%pip install langchain llama-cpp-python

In [None]:
from langchain.embeddings import LlamaCppEmbeddings
from werkzeug.utils import secure_filename
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.llms import LlamaCpp
from langchain.vectorstores import Chroma

import time

## Create the models inc embeddings

In [None]:
print("Creating model...")

model = LlamaCpp(model_path="./models/llama-2-7b.Q4_K_M.gguf", n_threads=4)
embeddings = LlamaCppEmbeddings(model_path="./models/llama-2-7b.Q4_K_M.gguf", n_threads=4)

print("Model created.")

## Create QA chain

In [None]:
print("Creating QA chain...")

# From https://python.langchain.com/docs/use_cases/question_answering/how_to/vector_db_qa

loader = TextLoader("./data/codereviewer-short.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

print(f"Found {len(texts)} chunks")

## Create embeddings and qa chain

In [20]:
docSearch = FAISS.from_documents(texts, embeddings)
#docSearch = Chroma.from_documents(texts, embeddings)

# Create a chain that uses the LlamaCPP LLM and FAISS vector store.
qaChain = RetrievalQA.from_chain_type(llm=model, chain_type="stuff", retriever=docSearch.as_retriever())

print("QA chain created.")

RuntimeError: [91mYour system has an unsupported version of sqlite3. Chroma requires sqlite3 >= 3.35.0.[0m
[94mPlease visit https://docs.trychroma.com/troubleshooting#sqlite to learn how to upgrade.[0m

## Run with prompt

In [7]:
print("Getting response...")
start_time = time.time()

prompt = "What is the goal of a code review?"
response = qaChain.run(prompt)

print("Response created.")

print(response)

Getting response...



llama_print_timings:        load time =  1365.27 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  1813.25 ms /    10 tokens (  181.33 ms per token,     5.51 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1817.55 ms


Response created.
 A code review is an important part of the development process and helps ensure that the highest quality code possible is produced by our team. Code reviews allow us to collectively learn from each other’s past experiences, catch issues before they become problems in production, and foster collaboration within our teams. 
The more you know about the work being reviewed, the better your feedback will be. 
Here are some things to help you understand a code change better:
If the PR is for feature development, read the associated task and/or spec.
If it's a bug fix, read the ticket.
If it's an enhancement or refactor, read the pull request title and description. 
Understand the purpose of the PR before you begin reviewing it.
When in doubt, ask questions!

General guidance:
Review PR’s thoroughly.
Ask for clarification if unclear as to what is being reviewed/expected.




llama_print_timings:        load time =  1562.77 ms
llama_print_timings:      sample time =   140.84 ms /   200 runs   (    0.70 ms per token,  1420.05 tokens per second)
llama_print_timings: prompt eval time = 63653.90 ms /   312 tokens (  204.02 ms per token,     4.90 tokens per second)
llama_print_timings:        eval time = 50304.94 ms /   199 runs   (  252.79 ms per token,     3.96 tokens per second)
llama_print_timings:       total time = 114761.32 ms


In [15]:
docs = docSearch.as_retriever().get_relevant_documents(prompt)
print("Documents referenced:")
for doc in docs:
    print("###")
    print(doc.page_content)

Documents referenced:
###
The correctness of the business logic embodied in the code.
The correctness of any new or changed tests.
The "readability" and maintainability of the overall design decisions reflected in the code.
The checklist of common errors that the team maintains for each programming language.
Code reviews should use the below guidance and checklists to ensure positive and effective code reviews.
###
Always remember that a goal of a code review is to verify that the goals of the corresponding task have been achieved. If you have concerns about the related, adjacent code that isn't in the scope of the PR, address those as separate tasks (e.g., bugs, technical debt). Don't block the current PR due to issues that are out of scope.
###
Be considerate
Be positive – encouraging, appreciation for good practices.
###
General guidance
Understand the code you are reviewing
Read every line changed.
If we have a stakeholder review, it’s not necessary to run the PR unless it aids you


llama_print_timings:        load time =  1365.27 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =  1797.61 ms /    10 tokens (  179.76 ms per token,     5.56 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =  1800.91 ms
