## Setup

In [1]:
%pip install -q  torch peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 accelerate sentencepiece

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m92.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m96.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer
from peft import AutoPeftModelForCausalLM

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive


In [4]:
%cd drive/MyDrive/CS\ 182\ Final\ Project/Phase \3

/content/drive/.shortcut-targets-by-id/1hzhdcGA40OipfzF0SRT7omKvzCSi0q4r/CS 182 Final Project/Phase 3


## Dataset

In [5]:
dataset_path = "qwedsacf/grade-school-math-instructions"
data = load_dataset(dataset_path, split="train")
data

Downloading readme:   0%|          | 0.00/852 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.55M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/8792 [00:00<?, ? examples/s]

Dataset({
    features: ['INSTRUCTION', 'RESPONSE', 'SOURCE'],
    num_rows: 8792
})

In [6]:
data = data.train_test_split(test_size=0.15, seed=123)
training_data, validation_data = data['train'], data['test']

In [7]:
preconditioning = 'You are a helpful assistant. Given a word problem, you need to break it down into steps and solve them step-by-step.'
def preprocess(example):
  example['text'] = "<s>[INST] <<SYS>>"+preconditioning+"<</SYS>>"+example['INSTRUCTION']+"[/INST]"+example["RESPONSE"]+"</s>"
  return example

training_data = training_data.map(preprocess)
validation_data = validation_data.map(preprocess)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

## Model

In [11]:
# base_model_name = "NousResearch/Llama-2-7b-chat-hf"
# base_model_name = 'openlm-research/open_llama_3b_v2'
base_model_name = 'gpt2-xl'
refined_model = "gpt2-xl-problem-solver_lr2e-6"

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2-xl and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir="./train_results/gpt2-xl-r64-a32-lr2e-6",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=450,
    logging_steps=225,
    learning_rate=2e-6,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=model,
    train_dataset=training_data,
    eval_dataset=validation_data,
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=train_params
)

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [None]:
# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
225,2.9396,2.725432
450,2.7926,2.517853
675,2.5388,2.319759
900,2.3864,2.10443
1125,2.1233,1.805953
1350,1.8848,1.665392
1575,1.7385,1.613447
1800,1.6641,1.577008
2025,1.6342,1.547516
2250,1.5957,1.523979


## Experiment

In [None]:
# tokenizer = AutoTokenizer.from_pretrained('openlm-research/open_llama_3b_v2')
# model = AutoPeftModelForCausalLM.from_pretrained('./llama-2-3b-problem-solver', torch_dtype=torch.float16, device_map='auto',
#                                                  pad_token_id=tokenizer.eos_token_id)

tokenizer_config.json:   0%|          | 0.00/593 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/512k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/330 [00:00<?, ?B/s]

You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/6.85G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [None]:
# prompt = """<s>[INST] <<SYS>>
# You are a helpful assistant. You are given simple problems and need to solve step-by-step.
# <</SYS>>

# Prince collected 37 back massagers in the morning and 513 board games in the afternoon. Determine the total number of back massagers and board games that Prince has.
# [/INST]"""
# input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to('cuda')

# generation_output = model.generate(
#     input_ids=input_ids, max_new_tokens=30, do_sample=True,
#     top_k=50,
#     top_p=0.96,
# )
# print(tokenizer.decode(generation_output[0]))

<s><s> [INST] <<SYS>>
You are a helpful assistant. You are given simple problems and need to solve step-by-step.
<</SYS>>

Prince collected 37 back massagers in the morning and 513 board games in the afternoon. Determine the total number of back massagers and board games that Prince has.
[/INST]The number of back massagers that Prince has is 37 * 2 = 74 back massagers.
The number of board games
