# LLaMA 3微调推理

## 加载预训练模型

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True, 
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config = bnb_config,
    device_map = "auto",
    use_cache = False
    )
base_model.config.pretraining_tp = 1
eval_tokenizer = AutoTokenizer.from_pretrained(base_model_id) 
eval_tokenizer.pad_token = eval_tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## 合并LoRA权重

In [2]:
from peft import PeftModel

ft_model = PeftModel.from_pretrained(base_model, "llama3-8b-style")

In [3]:
ft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Linear

## 随机选择样本

In [4]:
from datasets import load_dataset

dataset = load_dataset("neuralwork/fashion-style-instruct")

In [5]:
from random import seed, randrange

# 创建一个新的格式函数
def format_instruction_new(sample) :
    return f"""You are a personal stylist recommending fashion advice and clothing combinations. Use the self body and style description below, combined with the evert described in the context to generate 5 self-contained and complete outfit combinations.
### Input:
{sample["input"]}
### Context:
{sample["context" ]}
### Response:
"""

# 设置种子
seed(42)

# randrange函数生成随机数
sample = dataset["train"][randrange(len(dataset['train']))]

# 创建推理prompt
prompt = format_instruction_new(sample) 
print(prompt)

You are a personal stylist recommending fashion advice and clothing combinations. Use the self body and style description below, combined with the evert described in the context to generate 5 self-contained and complete outfit combinations.
### Input:
I'm a short, stocky woman with muscular calves and thighs. I like showing off my strong legs but need to balance my proportions on top.
### Context:
I'm going to a bachelorette / bachelor party.
### Response:



## 推理

In [6]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
# print("Using device:", device)

ft_model.to(device)

# tokenize input text
input_ids = eval_tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.to(device)

# inference, 5 outfit combinations make up around 700-750 tokens 
with torch.inference_mode():
    outputs = ft_model.generate(
        input_ids=input_ids, 
        max_new_tokens=800,
        do_sample=True,  
        top_p=0.9,
        temperature=0.9
    )

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [7]:
input_ids

tensor([[ 2675,   527,   264,  4443, 67473, 65774, 11401,  9650,   323, 17895,
         28559,    13,  5560,   279,   659,  2547,   323,  1742,  4096,  3770,
            11, 11093,   449,   279,   384,  1653,  7633,   304,   279,  2317,
           311,  7068,   220,    20,   659, 96324,   323,  4686, 28403, 28559,
           627, 14711,  5688,   512,    40,  2846,   264,  2875,    11,  5708,
            88,  5333,   449, 52192, 97310,   323, 60611,    13,   358,  1093,
          9204,  1022,   856,  3831, 14535,   719,  1205,   311,  8335,   856,
         49892,   389,  1948,   627, 14711,  9805,   512,    40,  2846,  2133,
           311,   264,   293, 15113, 18673,   668,   611, 49683,  4717,   627,
         14711,  6075,   512]], device='cuda:0')

In [8]:
len(input_ids[0])

93

In [9]:
outputs

tensor([[  2675,    527,    264,   4443,  67473,  65774,  11401,   9650,    323,
          17895,  28559,     13,   5560,    279,    659,   2547,    323,   1742,
           4096,   3770,     11,  11093,    449,    279,    384,   1653,   7633,
            304,    279,   2317,    311,   7068,    220,     20,    659,  96324,
            323,   4686,  28403,  28559,    627,  14711,   5688,    512,     40,
           2846,    264,   2875,     11,   5708,     88,   5333,    449,  52192,
          97310,    323,  60611,     13,    358,   1093,   9204,   1022,    856,
           3831,  14535,    719,   1205,    311,   8335,    856,  49892,    389,
           1948,    627,  14711,   9805,    512,     40,   2846,   2133,    311,
            264,    293,  15113,  18673,    668,    611,  49683,   4717,    627,
          14711,   6075,    512,   2729,   6410,  86956,    220,     16,    512,
             12,   7054,     25,    362,  29441,     11,   1022,  10826,     12,
           5562,    261,  19

In [10]:
len(outputs[0])

592

In [11]:
# decode token ids to text
outputs = outputs.detach().cpu().numpy()

outputs = eval_tokenizer.batch_decode(outputs, skip_special_tokens=True)

In [12]:
outputs

["You are a personal stylist recommending fashion advice and clothing combinations. Use the self body and style description below, combined with the evert described in the context to generate 5 self-contained and complete outfit combinations.\n### Input:\nI'm a short, stocky woman with muscular calves and thighs. I like showing off my strong legs but need to balance my proportions on top.\n### Context:\nI'm going to a bachelorette / bachelor party.\n### Response:\nOutfit Combination 1:\n- Top: A fitted, off-the-shoulder crop top in a bold color like red or hot pink to draw attention to your upper body.\n- Bottom: A high-waisted, flowy skirt in a fun pattern like leopard or floral to create a balanced silhouette.\n- Shoe: A pair of strappy sandals with a chunky heel to elongate your legs.\n- Accessories: Layered necklaces and statement earrings to add some sparkle to the outfit.\n\nOutfit Combination 2:\n- Top: A loose-fitting, cropped graphic tee in a neutral color like black or white 

In [13]:
output = outputs[0][len(prompt):]

print(f"Instruction: \n{sample['input']}\n") 
print(f"Context: \n{sample['context']}\n" ) 
print(f"Ground truth: \n{sample['completion']}\n") 
print(f"Generated output: \n{output}\n\n\n")

Instruction: 
I'm a short, stocky woman with muscular calves and thighs. I like showing off my strong legs but need to balance my proportions on top.

Context: 
I'm going to a bachelorette / bachelor party.

Ground truth: 
Outfit Combination 1:
- Top: A flowy, off-the-shoulder blouse in a bold color like royal blue or hot pink. The off-the-shoulder style will draw attention upward and balance your proportions.
- Bottom: High-waisted, black leather leggings. The high-waisted cut will emphasize your waistline and flatter your figure, while the leather material adds a touch of edge to the outfit.
- Shoe: Pair the outfit with black ankle boots with a chunky heel. The boots will elongate your legs while adding stability and comfort for a night of dancing.
- Accessories: Add a statement necklace with colorful gemstones and a stack of silver bangle bracelets to further enhance the look.

Outfit Combination 2:
- Top: A fitted, black bodysuit with a plunging V-neckline. The fitted style will ac

In [28]:
prompt = '''You are a personal stylist recommending fashion advice and clothing combinations. Use the self body and style description below, combined with the evert described in the context to generate 5 self-contained and complete outfit combinations.
### Input:
I am a tall man, with well-developed chest muscles and distinct abs. I like to subtly show off my abs, but not too ostentatiously.
### Context:
I want to attend an interview.
### Response:'''
inputs = eval_tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = ft_model.generate(
        input_ids=inputs.input_ids.to(device),
        max_new_tokens=800,
        do_sample=True,  
        top_p=0.9,
        temperature=0.9
    )

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [29]:
generate_ids = generate_ids.detach().cpu().numpy()
outputs = eval_tokenizer.batch_decode(generate_ids, skip_special_tokens=True)

In [30]:
output = outputs[0][len(prompt):]
print(f"Generated output: \n{output}\n\n\n")

Generated output: 
 
Outfit Combination 1:
- Top: A tailored, fitted button-down shirt in a light shade of blue. Opt for a shirt with a slightly longer length to flatter your tall frame.
- Bottom: Pair the shirt with a pair of well-tailored, slim-fit chinos in a neutral color like charcoal or navy.
- Shoes: Choose a classic pair of black leather oxford shoes for a professional touch.
- Accessories: Keep it minimal with a sleek black leather belt and a sophisticated silver watch.

Outfit Combination 2:
- Top: Wear a well-fitted, crew neck t-shirt in a dark shade of gray. Opt for a shirt with a slightly longer length to emphasize your physique.
- Bottom: Pair the t-shirt with a pair of dark wash, slim-fit jeans. Make sure they are well-tailored to elongate your legs.
- Shoes: Opt for a pair of clean white sneakers for a modern and casual look.
- Accessories: Add a touch of personality with a patterned or textured leather bracelet and a pair of stylish sunglasses.

Outfit Combination 3:
-