# TEST QWEN 0.5B

## Import libs

In [1]:
import os, torch, wandb

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
from dataclasses import dataclass

  from .autonotebook import tqdm as notebook_tqdm


## Adapter, lib modules, etc

In [2]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "Qwen/Qwen2-0.5B"
    new_model = "qwen-finetuned"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

## Init casual LLM

In [3]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
casual_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
#     device_map="auto",
    attn_implementation=cfg.attn_implementation
)

tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad_token|>'

`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [4]:
casual_model, tokenizer = setup_chat_format(casual_model, tokenizer)

## Init finetuned model

In [5]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "qwen-finetuned"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
finetuned_model = AutoModelForCausalLM.from_pretrained(
    "qwen-finetuned",
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    attn_implementation=cfg.attn_implementation
)

tokenizer_fine = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer_fine.padding_side = 'right'
tokenizer_fine.padding_token = '<|pad_token|>'

OSError: qwen-finetuned is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [7]:
finetuned_model, tokenizer_fine = setup_chat_format(finetuned_model, tokenizer_fine)

## API

In [8]:
def generate_answer(model, prompt):
    chat = [
        { "role": "user", "content": prompt },
    ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)

    return(tokenizer.decode(outputs[0]))

## Test both

In [9]:
q1 = "Who is Lenin"

In [10]:
print(generate_answer(casual_model, q1))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


<|im_start|>user
Who is Lenin<|im_end|>
<|im_start|>assistant
The article "Who is Lenin?" is a biography of Vladimir Lenin, a prominent figure in the Russian Revolution of 1917. The article provides a detailed account of Lenin's life, including his early years, his involvement in the Russian Revolution, and his role as a key figure in the Bolshevik Party. The article also covers his political and social views, including his views on the role of the state and the importance of individual freedom and equality. The article also highlights his contributions to the Russian Revolution, including his leadership of the Bolshevik Party and his role in the drafting of the Bolshevik Manifesto. The article concludes by summarizing Lenin's legacy as a key figure in the Russian Revolution and his impact on the world.<|endoftext|>Human Rights Watch



In [11]:
print(generate_answer(finetuned_model, q1))

<|im_start|>user
Who is Lenin<|im_end|>
<|im_start|>assistant
The following is the text of the document "Who is Lenin?" by Vladimir Lenin. It is a comprehensive biography of Lenin, including his life, work, and legacy. It includes information on his early life, his political career, his political views, and his contributions to the Russian and Soviet revolutionary movements. It also includes information on his personal life, including his marriage, children, and family background. The document also includes information on his political opponents, including the Bolsheviks, the Mensheviks, and the Social Democrats. The document also includes information on Lenin's intellectual and literary works, including his essays, poetry, and novels. The document also includes information on his personal life, including his health, weight, and diet. The document also includes
