In [1]:
QUESTIONS_TYPES = ["sleep", "cars"]
PROMPTS = {
    "cars": {
        "system": "You are an expert in sleep science with in-depth knowledge of sleep physiology, circadian rhythms, sleep disorders, and the impact of sleep on health and cognitive performance. Your task is to generate insightful and varied answers on sleep-related topics. The answers should be diverse in complexity, suitable for learners and experts alike.",
        "rag": "Use resources provided to answer the following question.\nResources: {resources}\n\nHuman: Generate me an answer to the given question: {question}\n\nAssistant:",
    },
    "sleep": {
        "system": "You are an expert in the history of automobiles with in-depth knowledge of the development of automobiles from the late 19th century to the present day. Your task is to generate insightful and varied answers on automobile history. The answers should be diverse in complexity, suitable for learners and experts alike.",
        "rag": "Use resources provided to answer the following question.\nResources: {resources}\n\nHuman: Generate me an answer to the given question: {question}\n\nAssistant:",
    },
}
MAX_NEW_TOKENS = 8192

In [17]:
from threading import Thread

import nltk
import transformers
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader # type: ignore
from langchain.text_splitter import CharacterTextSplitter, NLTKTextSplitter # type: ignore
from langchain.vectorstores import FAISS # type: ignore
from langchain.embeddings.huggingface import HuggingFaceEmbeddings  # type: ignore
from langchain.schema.runnable import RunnablePassthrough # type: ignore
from langchain.schema import Document # type: ignore

nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /home/stepan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [14]:
def load_model(task_type, use_rag=True):
    MODEL_ID = f"../models/{task_type}/llama-3_2-1b-it"
    tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_ID)
    streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
    text_generation_pipeline = transformers.pipeline(
        model=MODEL_ID,
        task="text-generation",
        temperature=0.5,
        repetition_penalty=1.1,
        return_full_text=True,
        max_new_tokens=MAX_NEW_TOKENS,
        streamer=streamer
    )
    
    llama_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

    prompt = PromptTemplate(
        input_variables=["question"] + ["resources"] if use_rag else [],
        template=PROMPTS[task_type]['rag' if use_rag else 'basic'],
    )

    llm_chain = LLMChain(llm=llama_llm, prompt=prompt)

    if not use_rag:
        return llm_chain

    loader = TextLoader(f"../data/{task_type}.txt")
    docs = loader.load()

    text_splitter = NLTKTextSplitter(chunk_size=250, chunk_overlap=20)
    chunked_documents = text_splitter.split_documents(docs)

    for doc in chunked_documents:
        doc.metadata['task_type'] = task_type

    db = FAISS.from_documents(chunked_documents, HuggingFaceEmbeddings(model_name='sentence-transformers/multi-qa-MiniLM-L6-dot-v1'))
    retriever = db.as_retriever(
        search_type="similarity",
        search_kwargs={'k': 4, 'score_threshold': 0.5},
        filter={'task_type': task_type}
    )

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    return ( {"resources": retriever | format_docs, "question": RunnablePassthrough()} | llm_chain), streamer

In [15]:
model, streamer = load_model("sleep")

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Created a chunk of size 337, which is longer than the specified 250
Created a chunk of size 360, which is longer than the specified 250
Created a chunk of size 316, which is longer than the specified 250
Created a chunk of size 255, which is longer than the specified 250
Created a chunk of size 382, which is longer than the specified 250
Created a chunk of size 293, which is longer than the specified 250
Created a chunk of size 565, which is longer than the specified 250
Created a chunk of size 313, which is longer than the specified 250
Created a chunk of size 275, which is longer than the specified 250
Created a chunk of size 273, which is longer than the specified 250
Created a chunk of size 311, which is longer than the specified 250
Created a chunk

In [13]:
# model.invoke('What is the best car to buy?')

In [19]:
thread = Thread(target=model.invoke, args=('What is the best car to buy?',))
thread.start()
for text in streamer:
    print(text, end='')
thread.join()

 I can provide you with some general information on popular cars that are known for their reliability and performance. However, it's essential to note that the "best" car depends on various factors such as budget, personal preferences, and intended use.

Here are a few examples of top-rated vehicles in different categories:

1. **Luxury Cars**: 
    - BMW 5 Series
    - Mercedes-Benz S-Class
    - Audi A6

These luxury cars offer exceptional comfort, performance, and style at a higher price point. They may be more expensive upfront but can last longer due to better materials and construction.

2. **SUVs and Crossovers**:
    - Toyota RAV4
    - Honda CR-V
    - Subaru Ascent

These vehicles are great for those who want a balance between space and fuel efficiency while still providing excellent safety features and reliability.

3. **Electric Vehicles (EVs)**:
    - Tesla Model 3
    - Chevrolet Bolt EV
    - Hyundai Kona Electric

As electric vehicles gain popularity, they become increa