In [1]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [2]:
import json
import time
from time import perf_counter
import lancedb
import os
import boto3
import botocore
from langchain_community.vectorstores import LanceDB

from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
experiment = "lance"
statistics = [f"save_{experiment}", f"search_{experiment}", f"execute_{experiment}"]
dict_json = {}

In [4]:
embedder = LlamafileEmbeddings()

In [5]:
llm = Llamafile()

In [6]:
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [7]:
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"
os.environ["AWS_ENDPOINT"] = "http://localhost:9000"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
os.environ["ALLOW_HTTP"] = "true"
minio_bucket_name = "lance"

# Create a boto3 session with path-style access
session = boto3.Session()
s3_client = session.client("s3", config=botocore.config.Config(s3={'addressing_style': 'path'}))

# Connect to LanceDB using path-style URI and s3_client
db_uri = f"s3://{minio_bucket_name}/tables"

In [8]:
db = lancedb.connect(db_uri)

In [9]:
time_start = perf_counter()
vectorstore = LanceDB.from_documents(documents=docs, embedding=embedder, connection=db)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["save"] = time_duration

[2024-08-01T19:37:49Z WARN  lance_table::io::commit] Using unsafe commit handler. Concurrent writes may result in data loss. Consider providing a commit handler that prevents conflicting writes.


In [10]:
# query it
query = "What professions did Gerson have?"
time_start = perf_counter()
results = vectorstore.similarity_search(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["search"] = time_duration
print(results)

[Document(metadata={'page': 0, 'source': 'resume.pdf'}, page_content='DataOrchestrationandDataOpsEngineeringTemplate(03/2023-Present)-Developedanopen-sourcerepositoryandprojectprovidingacomprehensiveframeworkandasuiteoftoolsforDataOrchestrationandDataOps.-Designedtosimplifytheend-to-endmanagementofdataworkflows,theprojectincludestoolscapableofperformingdataextraction,transformation,andloading(ETL),datavalidation,andmonitoring.-Aimedatstreamliningdataoperationsandenhancingdatareliability,thetemplatefacilitatesseamlessorchestrationofdatapipelines,ensuringefficientdataflowandtimelyprocessing.-Committedtofosteringacollaborativeandthrivingdatacommunity,theprojectembracesopen-sourceprinciples,enablingdataengineerstoleveragethetemplatefordiversedata-drivenprojectswithease.'), Document(metadata={'page': 0, 'source': 'resume.pdf'}, page_content="0 3 / 2 0 1 0-1 2 / 2 0 1 5,8 0 . 5 %Master's,AppliedInformaticsUniversidadeFederalRuraldePernambuc o\n0 3 / 2 0 1 8-0 2 / 2 0 2 0,H i g h e s tD i s t

In [11]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [12]:
query = "What professions did Gerson have?"
time_start = perf_counter()
response = qa.run(query)
time_duration = perf_counter() - time_start
dict_json.setdefault(experiment, {})["execute"] = time_duration
print(response)

  warn_deprecated(


 Gerson is a Data Scientist, Software Developer, and Data Scientist with expertise in Python/Machine Learning, Microservices development, technical solutions for business requirements. He has experience working in an R&D team focused on solving various problems related to software engineering, data science, and AI technologies. He is responsible for developing microservices in Python using various technologies such as Docker, Kubernetes, HelmChart, REST APIs, gRPC, MongoDB, SQL, Oracle DB, CI/CD, Machine Learning/Deep Learning models using tools like PyTorch, Scikit-Learn, Feast, Apache Airflow, and MPLF. He has also worked on building machine learning / deep learning models using Python frameworks such as PyTorch, Scikit-Learn, and Feast.</s>


In [13]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid

In [14]:
timestr = time.strftime("%Y%m%d-%H%M%S")
with open(f"results/{experiment}/{timestr}.json", "w") as f:
    json.dump(dict_json, f, ensure_ascii=False)