In [1]:
import json
import transformers  # type: ignore

from tqdm.auto import tqdm  # type: ignore

from langchain.llms import HuggingFacePipeline  # type: ignore
from langchain.chains import LLMChain  # type: ignore
from langchain.prompts import PromptTemplate  # type: ignore
from langchain.embeddings.huggingface import HuggingFaceEmbeddings  # type: ignore

from datasets import Dataset, DatasetDict  # type: ignore

# disable warnings
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset_type = 'sleep'
PROMPT_MODE = 'rag'

In [3]:
BASE_PATH = "/home/stepan/kaggle-arc-agi"
MODEL_ID = f"{BASE_PATH}/models/{dataset_type}/llama-3_2-1b-it"
MAX_NEW_TOKENS = 8192
MAX_SEQ_LENGTH = 32768 - MAX_NEW_TOKENS

In [4]:
PROMPTS = {
    'cars': {
        'system': "You are an expert in sleep science with in-depth knowledge of sleep physiology, circadian rhythms, sleep disorders, and the impact of sleep on health and cognitive performance. Your task is to generate insightful and varied answers on sleep-related topics. The answers should be diverse in complexity, suitable for learners and experts alike.",
        'basic': "Generate me an answer to the given question: {question}\n\n",
        'rag': "Generate me an answer to the given question: {question}\n\nResources: {resources}",
    },
    'sleep': {
        'system': "You are an expert in the history of automobiles with in-depth knowledge of the development of automobiles from the late 19th century to the present day. Your task is to generate insightful and varied answers on automobile history. The answers should be diverse in complexity, suitable for learners and experts alike.",
        'basic': "Generate me an answer to the given question: {question}\n\n",
        'rag': "Generate me an answer to the given question: {question}\n\nResources: {resources}",
    }
}

In [5]:
text_generation_pipeline = transformers.pipeline(
    model=MODEL_ID,
    task="text-generation",
    temperature=0.5,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=MAX_NEW_TOKENS,
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [6]:
llama_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt = PromptTemplate(
    input_variables=["question"],
    template=PROMPTS[dataset_type][PROMPT_MODE],
)

llm_chain = LLMChain(llm=llama_llm, prompt=prompt)

In [7]:
llm_chain.invoke({"question": "What is the best car?"})

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


{'question': 'What is the best car?',
 'text': 'Human: Generate me an answer to the given question: What is the best car?\n\nAssistant: I\'d be happy to help you with that. The answer depends on various factors such as budget, personal preferences, and intended use of the vehicle.\n\nHere are some top contenders for the "best" car:\n\n**Luxury Segment**\n\n1. **Mercedes-Benz S-Class**: A flagship sedan known for its comfort, performance, and opulent interior.\n2. **BMW 7-Series**: A premium luxury SUV offering exceptional handling, safety features, and a spacious cabin.\n3. **Audi R8**: A high-performance sports car featuring cutting-edge technology, stunning design, and thrilling driving experiences.\n\n**Performance Segment**\n\n1. **Porsche 911**: A legendary sports car renowned for its exceptional handling, acceleration, and distinctive design.\n2. **Ferrari F8 Tributo**: An ultra-luxurious supercar boasting breathtaking performance, stylish design, and exclusive features.\n3. **Ni

In [8]:
def load_data(file_path):
    with open(file_path, "r") as f:
        data = json.load(f)
    return data


def to_dataset(data):
    restructured_data = {
        "question": [],
        "resources": [],
        "answer": [],
    }

    for qna in data:
        restructured_data["question"].append(qna["question"])
        restructured_data["answer"].append(qna["answer"])
        restructured_data["resources"].append('\n'.join([resource['summary'] for resource in qna["citation"]]))

    return Dataset.from_dict(restructured_data)


def prepare_dataset(base_path=None):
    test_cars = load_data(f"{base_path}/data/test_qa_car.json")
    test_sleep = load_data(f"{base_path}/data/test_qa_sleep.json")

    test_cars_dataset = to_dataset(test_cars)
    test_sleep_dataset = to_dataset(test_sleep)

    return {"cars": test_cars_dataset, "sleep": test_sleep_dataset}

In [9]:
dataset = prepare_dataset(base_path=BASE_PATH)

# Test without RAG

In [10]:
def predictions(dataset, dataset_type):
    predictions = []
    for question in tqdm(dataset[dataset_type]["question"]):
        predictions.append(llm_chain.invoke({"question": question}))
    # save predictions
    with open(f"{BASE_PATH}/data/{dataset_type}_predictions.json", "w") as f:
        json.dump(predictions, f)
    return predictions

In [11]:
predictions = predictions(dataset, dataset_type)

  0%|          | 0/27 [00:00<?, ?it/s]

 33%|███▎      | 9/27 [01:56<03:50, 12.83s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 27/27 [06:00<00:00, 13.34s/it]


# RAG

In [12]:
from langchain_community.document_loaders import TextLoader # type: ignore

In [None]:
loader = TextLoader(f"{BASE_PATH}/data/{dataset_type}.txt")
loader.load()