In [17]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [2]:
import json
import time
from time import perf_counter
import marqo
import cProfile
from langchain_community.vectorstores import Marqo

from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks

In [3]:
experiment = "marqo"
statistics = [f"save_{experiment}", f"search_{experiment}", f"execute_{experiment}"]
dict_json = {}

In [4]:
embedder = LlamafileEmbeddings()

In [5]:
llm = Llamafile()

In [6]:
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [13]:
index_name = "langchain-demo"
time_start = perf_counter()
vectorstore = Marqo.from_documents(docs, index_name=index_name)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["save"] = time_duration

Index langchain-demo exists.


In [14]:
# query it
query = "What professions did Gerson have?"
time_start = perf_counter()
results = vectorstore.similarity_search(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["search"] = time_duration
print(results)

[Document(metadata={'source': 'resume.pdf', 'page': 0}, page_content='GersonSantos\nDataScientist|SoftwareDeveloperProficientinPython/MachineLearningbasedmicroservicesdevelopmen tandanabilitytotranslatebusinessrequirementsintotechnicalsolutions.Ihaveapassionforconsistentlearningandinnovating.\nWORKEXPERIENCEDataScientistatCESARCentrodeEstudoseSistemasAvançadosdoRecife\n0 2 / 2 0 2 0-P r e s e n t,R e m o t e\nA c h i e v e m e n t s / T a s k s,R e c i f e ,P e r n a m b u c o ,B r a z i l\n-WorkinginanR&DteamfocusedresearchinthefieldofSoftwareEngineering ,DataScienceandAIdevelopingsolutionstosolveavarietyofproblemsforaglobalclient;\n-ResponsiblefordevelopingmicroservicesinPythoninvolvingpipelines,datastreaming(Spark),messagingsystems(Kafka,RabbitMQ )withtechnologiessuchasDocker,Kubernetes,HelmChart,RESTAPI,gRPC,MongoDB,SQL,OracleDB,CI/CD\n-Planned,trained,evaluated,deployed,andmaintainedMachinelearning /Deeplearningmodelsusingtools/frameworkssuchasPyTorch,Scikit-Learn,Feast,ApacheAirf

In [15]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [18]:
query = "What professions did Gerson have?"
time_start = perf_counter()
response = qa.run(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["execute"] = time_duration
print(response)

 GersonSantos worked as a Data Scientist in a technology company and Software Developer.</s>


In [19]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid

In [20]:
timestr = time.strftime("%Y%m%d-%H%M%S")
with open(f"results/{experiment}/{timestr}.json", "w") as f:
    json.dump(dict_json, f, ensure_ascii=False)