In [1]:
%%bash
# llamafile setup

# Step 1: Download a llamafile. The download may take several minutes.
# wget https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.
# chmod +x Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile

# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.
# Alternatively, you can just open a separate terminal outside this notebook and run: 
#   ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding
# ./Meta-Llama-3.1-8B-Instruct.Q6_K.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &
pid=$!
echo "${pid}" > .llamafile_pid  # write the process pid to a file so we can terminate the server later

In [2]:
import lancedb
import os
import boto3
import botocore
from langchain_community.vectorstores import LanceDB

from langchain_community.embeddings import LlamafileEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms.llamafile import Llamafile
from utils.text import get_pdf_text, get_text_chunks

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embedder = LlamafileEmbeddings()

In [4]:
llm = Llamafile()

In [5]:
docs = get_text_chunks(get_pdf_text("resume.pdf"))

In [6]:
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"
os.environ["AWS_ENDPOINT"] = "http://localhost:9000"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
os.environ["ALLOW_HTTP"] = "true"
minio_bucket_name = "lance"

# Create a boto3 session with path-style access
session = boto3.Session()
s3_client = session.client("s3", config=botocore.config.Config(s3={'addressing_style': 'path'}))

# Connect to LanceDB using path-style URI and s3_client
db_uri = f"s3://{minio_bucket_name}/tables"

In [7]:
db = lancedb.connect(db_uri)
# table = db.create_table(
#     "pandas_docs",
#     data=[
#         {
#             "vector": embedder.embed_query("Hello World"),
#             "text": "Hello World",
#             "id": "1",
#         }
#     ],
#     mode="overwrite",
# )
vectorstore = LanceDB.from_documents(documents=docs, embedding=embedder, connection=db)

[2024-07-31T21:01:58Z WARN  lance_table::io::commit] Using unsafe commit handler. Concurrent writes may result in data loss. Consider providing a commit handler that prevents conflicting writes.


In [8]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [9]:
query = "What professions did Gerson have?"
response = qa.run(query)
print(response)

  warn_deprecated(


 Gerson has experience in Data Scientist, Software Developer, and Microservices development.</s>


In [10]:
%%bash
# cleanup: kill the llamafile server process
kill $(cat .llamafile_pid)
rm .llamafile_pid