<a href="https://colab.research.google.com/github/HwangJieun03/nlp_homework.ipynb/blob/main/nlp_homework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers accelerate datasets peft trl bitsandbytes sacrebleu



In [None]:
import os
from dataclasses import dataclass, field
import re
import torch
from accelerate import Accelerator
from datasets import load_dataset, Dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging as hf_logging,
)
from trl import SFTTrainer, SFTConfig
from trl.trainer import ConstantLengthDataset
from sacrebleu import corpus_bleu  # BLEU score computation library
import logging
import wandb

# Setting


In [None]:
# Q1: Model Replacment
base_model_id = "meta-llama/Llama-3.2-1B-Instruct"
device_map = "cuda"
output_dir = "./llama-order-analysis"
dataset_name = "./llm-modeling-lab.jsonl"
seq_length = 512
torch_dtype = torch.bfloat16

# Raw Dataset

In [None]:
# Loading the raw dataset
full_dataset = Dataset.from_json(path_or_paths=dataset_name)

# Q2 Splitting the dataset
train_dataset = full_dataset.select(range(2800))  # Training data (2,800 samples)
val_dataset = full_dataset.select(range(2800, 3000))    # Validation data (200 samples)

Generating train split: 0 examples [00:00, ? examples/s]

# Loading the Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.padding_side = "right"
if getattr(tokenizer, "pad_token", None) is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

# Loading the Base Model

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
)


config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [None]:
base_model.config.use_cache = False

# Utilities

In [None]:
# Prepare sample text based on the user's prompt
def function_prepare_sample_text(tokenizer, for_train=True):
    """Prepare sample text with prompts."""
    def _prepare_sample_text(example):
        user_prompt = "Extract food names, options, and quantities:\n### Order: "
        messages = [{"role": "user", "content": f"{user_prompt}{example['input']}"}]
        if for_train:
            messages.append({"role": "assistant", "content": f"{example['output']}"})
        return tokenizer.apply_chat_template(messages, tokenize=False)
    return _prepare_sample_text

In [None]:
# Preparing the dataset
def create_datasets(tokenizer, dataset, seq_length):
    prepare_sample_text = function_prepare_sample_text(tokenizer)
    chars_per_token = 5  # Approximate ratio
    cl_dataset = ConstantLengthDataset(
        tokenizer,
        dataset,
        formatting_func=prepare_sample_text,
        infinite=True,
        seq_length=seq_length,
        chars_per_token=chars_per_token,
    )
    return cl_dataset

# Build a dataset

In [None]:
train_ds = create_datasets(tokenizer, train_dataset, seq_length)

In [None]:
val_ds = create_datasets(tokenizer, val_dataset, seq_length)

# Setting for a LoRA adapter

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "up_proj", "gate_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

# Fine-Tuning

In [None]:
sft_config = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    gradient_checkpointing=False,
    learning_rate=1e-4,
    warmup_ratio=0.1,
    max_grad_norm=0.3,
    weight_decay=0.05,
    num_train_epochs=1,
    logging_steps=20,
    eval_strategy="no",
    save_strategy="steps",
    save_steps=50,
    save_total_limit=2,
    max_seq_length=seq_length,
    report_to="wandb",
    run_name="llama-fine-tuning",
)


In [None]:

trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_ds,  # Training dataset
    eval_dataset=val_ds,   # Validation dataset
    peft_config=lora_config,
    tokenizer=tokenizer,
    args=sft_config,
)

In [None]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




Step,Training Loss
20,2.937
40,2.5226
60,2.0521
80,1.5572
100,1.2874
120,1.1651
140,1.1386
160,1.0149
180,0.9377
200,0.8549




TrainOutput(global_step=1400, training_loss=0.789603476524353, metrics={'train_runtime': 2432.4131, 'train_samples_per_second': 1.151, 'train_steps_per_second': 0.576, 'total_flos': 8419093040332800.0, 'train_loss': 0.789603476524353, 'epoch': 1.0})

In [None]:
# Q3:Uploading the LoRA adapter to Hugging Face Hub
trainer.push_to_hub()

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/22.6M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.56k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Jieun03/llama-order-analysis/commit/a0cd0d1520ce2621052aad8988988847425b6e6a', commit_message='End of training', commit_description='', oid='a0cd0d1520ce2621052aad8988988847425b6e6a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Jieun03/llama-order-analysis', endpoint='https://huggingface.co', repo_type='model', repo_id='Jieun03/llama-order-analysis'), pr_revision=None, pr_num=None)

In [None]:
# Q4: Loading the fine-tuned model from Hugging Face Hub
adapter_model = AutoPeftModelForCausalLM.from_pretrained(
    output_dir, quantization_config=bnb_config, device_map="auto"
)

# Evaluation

In [None]:
# Q5 : Validation and BLEU Score Calculation
from tqdm import tqdm

def evaluate_bleu(model, tokenizer, val_dataset):
    preds, refs = [], []
    for example in tqdm(val_dataset, desc="Evaluating BLEU"):
        # Prepare input text
        input_text = f"### Order: {example['input']}\n"
        input_data = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

        # Generate model predictions
        pred = tokenizer.decode(
            model.generate(
                input_data.input_ids.cuda(),
                attention_mask=input_data.attention_mask.cuda(),
                max_new_tokens=128,
                do_sample=False,
                pad_token_id=tokenizer.pad_token_id,
            )[0],
            skip_special_tokens=True,
        )

        # Append predictions and reference data
        preds.append(pred)
        refs.append([example["output"]])  # Wrapping reference data in a list

    # Compute BLEU score
    bleu_score = corpus_bleu(preds, refs).score

    # Display sample results
    print("Sample Predictions and References:")
    for pred, ref in zip(preds[:5], refs[:5]): # Display top 5 results
        print(f"Prediction: {pred}")
        print(f"Reference: {ref}\n")

    return bleu_score


In [None]:
# Q5 : BLEU Score Calculation
bleu_score = evaluate_bleu(adapter_model, tokenizer, val_dataset)

Evaluating BLEU: 100%|██████████| 200/200 [18:03<00:00,  5.42s/it]

Sample Predictions and References:
Prediction: ### Order: 꽃살버섯샐러드 한그릇과 스테이크올리오스파게티 세트를 주세요.
- 분석 결과 0: 음식명:꽃살버섯샐러드, 수량:한그릇
- 분석 결과 1: 음식명:스테이크올리오스파게티, 수량:세트, 옵션:아이스, 옵션:대, 수량:한잔
- 분석 결과 2: 음식명:꽃잎, 수량:한그릇
- 분석 결과 3: 음식명:스테이크, 옵션:아이스, 수량:한잔.수량:한그
Reference: ['- 분석 결과 0: 음식명:꽃살버섯샐러드, 수량:한그릇 \n- 분석 결과 1: 음식명:스테이크올리오스파게티']

Prediction: ### Order: 교촌통통치킨카츠 한 판, 행복지수커피 한잔, 그리고 헤즐넛 핫 하나도 주세요.
- 분석 결과 0: 음식명:교촌통통치킨카츠,수량:한 판
- 분석 결과 1: 음식명:행복지수커피,수량:한잔
- 분석 결과 2: 음식명:헤즐넛,옵션:핫,수량:하나.
Reference: ['- 분석 결과 0: 음식명:교촌통통치킨카츠, 수량: 한 판\n- 분석 결과 1: 음식명:행복지수커피, 수량: 한잔\n- 분석 결과 2: 음식명:헤즐넛, 옵션: 핫, 수량: 하나']

Prediction: ### Order: 에스프레소도피오(핫) 한잔과 진하게 허니티 한 잔 주세요.
- 분석 결과 0: 음식명:에스프레소도피오,옵션:핫,수량:한잔
- 분석 결과 1: 음식명:진하게 허니티,수량:한 잔.
Reference: ['- 분석 결과 0: 음식명:에스프레소도피오,옵션:핫,수량:한잔\n- 분석 결과 1: 음식명:허니티,옵션:진하게,수량:한 잔']

Prediction: ### Order: 밀크티 두 캔에, 도토리묵밥 한 그릇이랑 동치미 한병 주세요.
- 분석 결과 0: 음식명:밀크티,수량:두 캔
- 분석 결과 1: 음식명:도토리묵밥,수량:한 그릇
- 분석 결과 2: 음식명:동치미,수량:한병.
Reference: ['- 분석 결과 0: 음식명:밀크티,수량:두 캔\n- 분석 결과 1: 음식명:도토리묵밥,




In [None]:
print(f"Final BLEU Score: {bleu_score:.2f}")

Final BLEU Score: 62.20
