In [1]:
%pip install wordllama

Collecting wordllama
  Downloading wordllama-0.3.2.post0-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting safetensors (from wordllama)
  Downloading safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)
Collecting toml (from wordllama)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Downloading wordllama-0.3.2.post0-cp312-cp312-win_amd64.whl (16.9 MB)
   ---------------------------------------- 0.0/16.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/16.9 MB ? eta -:--:--
   - -------------------------------------- 0.5/16.9 MB 1.9 MB/s eta 0:00:09
   - -------------------------------------- 0.8/16.9 MB 1.8 MB/s eta 0:00:10
   --- ------------------------------------ 1.3/16.9 MB 1.8 MB/s eta 0:00:09
   --- ------------------------------------ 1.6/16.9 MB 1.6 MB/s eta 0:00:10
   ---- ----------------------------------- 1.8/16.9 MB 1.6 MB/s eta 0:00:10
   ---- ----------------------------------- 2.1/16.9 MB 1.6 MB/s eta 0:00:10
   ------ -----

In [1]:
from langchain_core.embeddings.embeddings import Embeddings
from wordllama import WordLlama

In [2]:
class WordLlamaEmbeddings(Embeddings):
    def __init__(self):
        super().__init__()
        self.wl = WordLlama.load()
    
    def embed_query(self, text):
        return self.embed_documents(texts=[text])[0]
    
    def embed_documents(self, texts):
        return self.wl.embed(texts).tolist()

In [3]:
wl_embedding = WordLlamaEmbeddings()

In [4]:
wl_embedding.embed_documents('How are you?')

[[0.0680389404296875,
  0.3641204833984375,
  -0.06667518615722656,
  0.1931915283203125,
  0.0122528076171875,
  0.011775970458984375,
  -0.17281341552734375,
  -0.0818634033203125,
  0.04052734375,
  -0.29278564453125,
  -0.141845703125,
  0.188720703125,
  0.1627960205078125,
  0.0649566650390625,
  -0.11504605412483215,
  -0.10194921493530273,
  0.026763916015625,
  -0.30761146545410156,
  -0.144775390625,
  -0.0410919189453125,
  0.2548408508300781,
  -0.003597259521484375,
  0.056610107421875,
  0.16131591796875,
  0.08602142333984375,
  -0.0589447021484375,
  0.15967178344726562,
  -0.0382232666015625,
  -0.0177154541015625,
  -0.053466796875,
  0.042510986328125,
  0.21312332153320312,
  -0.047400474548339844,
  -0.08856010437011719,
  0.151275634765625,
  0.04498291015625,
  -0.0533599853515625,
  0.235076904296875,
  0.0597076416015625,
  -0.2534027099609375,
  0.11663055419921875,
  -0.160980224609375,
  0.14080810546875,
  0.1207122802734375,
  0.1858367919921875,
  0.03917

In [5]:
import getpass
import os

In [6]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = getpass.getpass('LANGCHAIN_API_KEY: ')

In [7]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter



In [8]:
loader = WebBaseLoader(
    web_paths=("https://llamaimodel.com/requirements/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('stats-table')
        )
    ),
)
docs = loader.load()

In [9]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [10]:
# vectorstore.delete_collection()

In [11]:
vectorstore = Chroma.from_documents(documents=splits, embedding=wl_embedding)

INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [12]:
from langchain_ollama.llms import OllamaLLM

In [13]:
llm = OllamaLLM(model='phi3')

In [14]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [15]:
rag_chain.invoke("What's the article about?")

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


'The article is about the detailed specifications of various versions (3.1, 70B, and 405B) of Llama AI models across different parameters such as context length, multilingual support, hardware requirements for CPUs and RAM, GPU options with associated disk space needs, estimated memory requirements in higher and lower precision modes along with software dependencies including operating systems, programming language (Python), frameworks (PyTorch or TensorFlow), and libraries. Each model specifies these details to guide users on the necessary setup required to run the respective Llama AI models effectively for tasks like natural language processing where multilingual support is also considered across eight languages in this context.'