In [1]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [2]:
import json
import time
from time import perf_counter
from langchain_community.vectorstores.redis import Redis

from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks

In [3]:
experiment = "redis"
statistics = [f"save_{experiment}", f"search_{experiment}", f"execute_{experiment}"]
dict_json = {}

In [4]:
embedder = LlamafileEmbeddings()

In [5]:
llm = Llamafile()

In [6]:
# texts = get_text_chunks(get_pdf_text("resume.pdf"))
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [7]:
time_start = perf_counter()
vectorstore = Redis.from_documents(
    docs,  # a list of Document objects from loaders or created
    embedder,  # an Embeddings object
    redis_url="redis://localhost:6379",
)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["save"] = time_duration

In [8]:
# query it
query = "What professions did Gerson have?"
time_start = perf_counter()
results = vectorstore.similarity_search(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["search"] = time_duration
print(results)

[Document(metadata={'id': 'doc:ca1cfd0578e24ab9878a935a1b99952e:f54d5630b63a4679886d0f9ebf4c44cd', 'source': 'resume.pdf', 'page': '0'}, page_content='DataOrchestrationandDataOpsEngineeringTemplate(03/2023-Present)-Developedanopen-sourcerepositoryandprojectprovidingacomprehensiveframeworkandasuiteoftoolsforDataOrchestrationandDataOps.-Designedtosimplifytheend-to-endmanagementofdataworkflows,theprojectincludestoolscapableofperformingdataextraction,transformation,andloading(ETL),datavalidation,andmonitoring.-Aimedatstreamliningdataoperationsandenhancingdatareliability,thetemplatefacilitatesseamlessorchestrationofdatapipelines,ensuringefficientdataflowandtimelyprocessing.-Committedtofosteringacollaborativeandthrivingdatacommunity,theprojectembracesopen-sourceprinciples,enablingdataengineerstoleveragethetemplatefordiversedata-drivenprojectswithease.'), Document(metadata={'id': 'doc:ca1cfd0578e24ab9878a935a1b99952e:933868f8e63f45af9c4e724c66042094', 'source': 'resume.pdf', 'page': '0'}, pag

In [9]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [10]:
query = "What professions did Gerson have?"
time_start = perf_counter()
response = qa.run(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["execute"] = time_duration
print(response)

  warn_deprecated(
score_threshold is deprecated. Use distance_threshold instead.score_threshold should only be used in similarity_search_with_relevance_scores.score_threshold will be removed in a future release.


 Gerson is a Data Scientist and Software Developer with proficiency in Python/Machine Learning-based microservices development, AI, data science, and R&D. He has experience working on an R&D team for solving various problems for a global client with a focus on software engineering, data science, and artificial intelligence (AI) in various technologies such as Docker, Kubernetes, Helm Chart, RESTAPI, gRPC, MongoDB, SQL, Oracle DB, CI/CD. He has also planned, trained, evaluated, deployed, and maintained Machine Learning / Deep Learning models using tools like PyTorch, Scikit-Learn, Feast, Apache Airflow, Mlflow, Pandas, NumPy.</s>


In [11]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid

In [12]:
timestr = time.strftime("%Y%m%d-%H%M%S")
with open(f"results/{experiment}/{timestr}.json", "w") as f:
    json.dump(dict_json, f, ensure_ascii=False)