# TEST QWEN 0.5B

## Import libs

In [1]:
import os, torch, wandb

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
from dataclasses import dataclass

  from .autonotebook import tqdm as notebook_tqdm


## Adapter, lib modules, etc

In [14]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "Qwen/Qwen2-0.5B"
    dataset_name = "C:\\Users\\USER_ELISEY\\miracl_"
    new_model = "qwen-finetuned"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

## Init casual LLM

In [15]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
casual_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
#     device_map="auto",
    attn_implementation=cfg.attn_implementation
)

tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad_token|>'

`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [16]:
casual_model, tokenizer = setup_chat_format(casual_model, tokenizer)

## Init finetuned model

In [2]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "qwen-finetuned"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
finetuned_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
#     device_map="auto",
    attn_implementation=cfg.attn_implementation
)

tokenizer_fine = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer_fine.padding_side = 'right'
tokenizer_fine.padding_token = '<|pad_token|>'

`low_cpu_mem_usage` was None, now set to True since model is quantized.


RuntimeError: Error(s) in loading state_dict for Qwen2ForCausalLM:
	size mismatch for model.embed_tokens.weight: copying a param with shape torch.Size([151646, 896]) from checkpoint, the shape in current model is torch.Size([151936, 896]).
	size mismatch for lm_head.weight: copying a param with shape torch.Size([151646, 896]) from checkpoint, the shape in current model is torch.Size([151936, 896]).

In [4]:
finetuned_model, tokenizer_fine = setup_chat_format(finetuned_model, tokenizer_fine)

NameError: name 'finetuned_model' is not defined

## API

In [None]:
def generate_answer(model, prompt):
    chat = [
        { "role": "user", "content": prompt },
    ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)

    return(tokenizer.decode(outputs[0]))

## Test both

In [14]:
q1 = "Who is Lenin"

In [15]:
print(generate_answer(casual_model, q1))

<|im_start|>user
Who is Lenin<|im_end|>
<|im_start|>assistant
The article "Who is Lenin?" is a biography of Vladimir Lenin, a prominent figure in the Russian Revolution of 1917. The article provides a detailed account of Lenin's life, including his early years, his involvement in the Russian Revolution, and his role as a key figure in the Bolshevik Party. The article also covers his political and social views, including his views on the role of the state and the importance of individual freedom and equality. The article also highlights his contributions to the Russian Revolution, including his leadership of the Bolshevik Party and his role in the drafting of the Bolshevik Manifesto. The article concludes by summarizing Lenin's legacy as a key figure in the Russian Revolution and his impact on the world.<|endoftext|>Human Rights Watch



In [None]:
print(generate_answer(finetuned_model, q1))