In [None]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer


model_name = "BAAI/llm-embedder"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

vectorstore = Chroma(persist_directory="llm-embedder", embedding_function=embeddings)
model = SentenceTransformer('BAAI/bge-reranker-base')


In [None]:
for i in vectorstore.similarity_search("What is title of the course number 11711?", k=10):
    print(i.page_content)

In [None]:
from langchain_core.retrievers import BaseRetriever, RetrieverLike, RetrieverOutputLike
from langchain_core.language_models import BaseLLM
from langchain_core.embeddings import Embeddings
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from typing import List



class CustomRetriever(BaseRetriever):
  

    # vectorstore = Chroma(persist_directory="llm-embedder", embedding_function=embeddings)
    # model = SentenceTransformer('BAAI/bge-reranker-base')

    vectorstore : RetrieverLike

    model : SentenceTransformer

    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:

        docs = self.vectorstore.get_relevant_documents(query, k=10)

        queries = [query]
        sentences = []
        for i in docs:
            sentences.append(i.page_content)

        embeddings_1 = self.model.encode(sentences, normalize_embeddings=True)
        embeddings_2 = self.model.encode(queries, normalize_embeddings=True)
        similarity = embeddings_1 @ embeddings_2.T
        results = [(similarity[count][0], i) for count, i in enumerate(docs)]
        results = sorted(results, key=lambda x:x[0])

        return [i for _,i in results][0:4]

In [None]:
custom = CustomRetriever(vectorstore=vectorstore.as_retriever(), model=model)

In [None]:
from langchain_community.llms import LlamaCpp

# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose

n_gpu_layers = -1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="llama-2-7b-chat.Q6_K.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)


In [None]:
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnablePick
from langchain_core.prompts.chat import HumanMessagePromptTemplate, PromptTemplate

rag_prompt = hub.pull("rlm/rag-prompt")
# rag_prompt.messages
prompt = HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use one sentence maximum and keep the answer CONCISE. Keep the answer CONCISE.\nQuestion: {question} \nContext: {context} \nAnswer:"))
rag_prompt.messages = [prompt]

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# Chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


retriever = custom
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)



In [None]:
f = open("FINALQUESTIONS.txt", "r")
questions = f.readlines()
f.close()

questions = [i.strip() for i in questions]

In [None]:
from tqdm import tqdm


answer_file = "SubmissionData/system_outputs/graham_reranking.txt"


f = open(answer_file, "w")
f.close()

answers = []
for i in tqdm(range(len(questions))):
  response = qa_chain.invoke(questions[i])

  f = open(answer_file, "a")
  f.write(response.replace("\n", "") + "\n")
  f.close()
  answers.append(response)
