In [1]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [2]:
import json
import time
from time import perf_counter
import weaviate
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks



In [3]:
experiment = "weaviate"
statistics = [f"save_{experiment}", f"search_{experiment}", f"execute_{experiment}"]
dict_json = {}

In [4]:
client = weaviate.connect_to_local(port=8081)

In [5]:
embedder = LlamafileEmbeddings()

In [6]:
llm = Llamafile()

In [7]:
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [8]:
time_start = perf_counter()
vectorstore = WeaviateVectorStore.from_documents(docs, embedder, client=client)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["save"] = time_duration

In [9]:
# query it
query = "What professions did Gerson have?"
time_start = perf_counter()
results = vectorstore.similarity_search(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["search"] = time_duration
print(results)

[Document(metadata={'page': 0.0, 'source': 'resume.pdf'}, page_content='DataOrchestrationandDataOpsEngineeringTemplate(03/2023-Present)-Developedanopen-sourcerepositoryandprojectprovidingacomprehensiveframeworkandasuiteoftoolsforDataOrchestrationandDataOps.-Designedtosimplifytheend-to-endmanagementofdataworkflows,theprojectincludestoolscapableofperformingdataextraction,transformation,andloading(ETL),datavalidation,andmonitoring.-Aimedatstreamliningdataoperationsandenhancingdatareliability,thetemplatefacilitatesseamlessorchestrationofdatapipelines,ensuringefficientdataflowandtimelyprocessing.-Committedtofosteringacollaborativeandthrivingdatacommunity,theprojectembracesopen-sourceprinciples,enablingdataengineerstoleveragethetemplatefordiversedata-drivenprojectswithease.'), Document(metadata={'page': 0.0, 'source': 'resume.pdf'}, page_content="0 3 / 2 0 1 0-1 2 / 2 0 1 5,8 0 . 5 %Master's,AppliedInformaticsUniversidadeFederalRuraldePernambuc o\n0 3 / 2 0 1 8-0 2 / 2 0 2 0,H i g h e s tD i

In [10]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [11]:
query = "What professions did Gerson have?"
time_start = perf_counter()
response = qa.run(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["execute"] = time_duration
print(response)

  warn_deprecated(


 Gerson is a Data Scientist and Software Developer with skills in Python/Machine Learning. They have experience working on microservices development using various technologies such as Docker, Kubernetes, HelmChart, RESTAPI, gRPC, SQL, OracleDB, CI/CD, among others. They are proficient in Python, Machine Learning, and Data Science, with expertise in software engineering, data science, and AI.</s>


In [12]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid

In [13]:
timestr = time.strftime("%Y%m%d-%H%M%S")
with open(f"results/{experiment}/{timestr}.json", "w") as f:
    json.dump(dict_json, f, ensure_ascii=False)