In [None]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [None]:
import marqo
import cProfile
from langchain_community.vectorstores import Marqo

from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks

In [None]:
embedder = LlamafileEmbeddings()

In [None]:
llm = Llamafile()

In [None]:
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [None]:
index_name = "langchain-demo-2"
vectorstore = Marqo.from_documents(docs, index_name=index_name)

In [None]:
query = "What professions did Gerson have?"
vectorstore.similarity_search(query)

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [None]:
query = "What professions did Gerson have?"
response = qa.run(query)
print(response)

In [None]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid