# Load Model and Tokenizer

In [11]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [13]:
os.chdir(os.path.expanduser(module_path+'/trainlora'))

## Only for model setting

In [2]:
from modules import shared, utils
from pathlib import Path
import re
shared.args.cpu=True

In [3]:
shared.args

Namespace(model='facebook_opt-1.3b', model_dir='models/', lora_dir='loras/', extensions=None, log_level='INFO', lora_name=None, lora_to_load=None, always_override=False, save_steps=0, micro_batch_size=4, batch_size=128, epochs=3, learning_rate='3e-4', lr_scheduler_type='linear', lora_rank=32, lora_alpha=64, lora_dropout=0.05, cutoff_len=256, dataset='None', eval_dataset='None', format=None, eval_steps=100, raw_text_file='None', overlap_len=128, newline_favor_len=128, higher_rank_limit=False, warmup_steps=100, optimizer='adamw_torch', hard_cut_string='\\n\\n\\n', cpu=True, auto_devices=False, gpu_memory=None, cpu_memory=None, disk=False, disk_cache_dir='cache', load_in_8bit=False, load_in_4bit=False, bf16=False, xformers=False, sdp_attention=False, trust_remote_code=False, wbits=0, model_type=None, groupsize=-1, pre_layer=None, checkpoint=None, monkey_patch=False, quant_attn=False, warmup_autotune=False, fused_mlp=False, autogptq=False, triton=False, deepspeed=False, nvme_offload_dir=No

In [4]:
#logging.basicConfig(level=shared.args.log_level.upper())
# # Loading custom settings
# settings_file = None
# if shared.args.settings is not None and Path(shared.args.settings).exists():
#     settings_file = Path(shared.args.settings)
# elif Path('settings.json').exists():
#     settings_file = Path('settings.json')

# Set default model settings based on settings.json
shared.model_config['.*'] = {
    'wbits': 'None',
    'model_type': 'None',
    'groupsize': 'None',
    'pre_layer': 0,
    'mode': shared.settings['mode'],
    'skip_special_tokens': shared.settings['skip_special_tokens'],
    'custom_stopping_strings': shared.settings['custom_stopping_strings'],
}

shared.model_config.move_to_end('.*', last=False)  # Move to the beginning
def atoi(text):
    return int(text) if text.isdigit() else text.lower()
def natural_keys(text):
    return [atoi(c) for c in re.split(r'(\d+)', text)]

available_models = sorted([re.sub('.pth$', '', item.name) \
                           for item in list(Path(f'{shared.args.model_dir}/').glob('*')) \
                           if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml'))], key=natural_keys)

In [5]:
available_models

['.DS_Store', 'facebook_opt-1.3b']

In [6]:
def get_model_specific_settings(model):
    settings = shared.model_config
    model_settings = {}

    for pat in settings:
        if re.match(pat.lower(), model.lower()):
            for k in settings[pat]:
                model_settings[k] = settings[pat][k]

    return model_settings

In [7]:
shared.model_name = available_models[1]
model_settings = get_model_specific_settings(shared.model_name)
shared.settings.update(model_settings)  # hijacking the interface defaults

In [8]:
model_settings

{'wbits': 'None',
 'model_type': 'None',
 'groupsize': 'None',
 'pre_layer': 0,
 'mode': 'chat',
 'skip_special_tokens': True,
 'custom_stopping_strings': ''}

## Key Steps

In [9]:
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
                          AutoModelForSeq2SeqLM, AutoTokenizer,
                          BitsAndBytesConfig, LlamaTokenizer)
import torch
import transformers
from accelerate import infer_auto_device_map, init_empty_weights

def huggingface_loader(model_name):
    LoaderClass = AutoModelForCausalLM

    params = {
        "low_cpu_mem_usage": True,
        "trust_remote_code": shared.args.trust_remote_code
    }

    shared.args.cpu = True
    params["torch_dtype"] = torch.float32

    checkpoint = Path(f'{shared.args.model_dir}/{model_name}')
    model = LoaderClass.from_pretrained(checkpoint, **params)

    return model

def load_tokenizer(model_name, model):
    tokenizer = None
    if shared.model_type == 'gpt4chan' and Path(f"{shared.args.model_dir}/gpt-j-6B/").exists():
        tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/gpt-j-6B/"))
    elif type(model) is transformers.LlamaForCausalLM:
        # Try to load an universal LLaMA tokenizer
        if shared.model_type not in ['llava', 'oasst']:
            for p in [Path(f"{shared.args.model_dir}/llama-tokenizer/"), Path(f"{shared.args.model_dir}/oobabooga_llama-tokenizer/")]:
                if p.exists():
                    logging.info(f"Loading the universal LLaMA tokenizer from {p}...")
                    tokenizer = LlamaTokenizer.from_pretrained(p, clean_up_tokenization_spaces=True)
                    return tokenizer

        # Otherwise, load it from the model folder and hope that these
        # are not outdated tokenizer files.
        tokenizer = LlamaTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}/"), clean_up_tokenization_spaces=True)
        try:
            tokenizer.eos_token_id = 2
            tokenizer.bos_token_id = 1
            tokenizer.pad_token_id = 0
        except:
            pass
    else:
        path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
        if path_to_model.exists():
            tokenizer = AutoTokenizer.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)

    return tokenizer

def load_model(model_name):
    load_func = huggingface_loader
    output = load_func(model_name)
    if type(output) is tuple:
        model, tokenizer = output
    else:
        model = output
        if model is None:
            return None, None
        else:
            tokenizer = load_tokenizer(model_name, model)

    # Hijack attention with xformers
    if any((shared.args.xformers, shared.args.sdp_attention)):
        llama_attn_hijack.hijack_llama_attention()
    return model, tokenizer



In [10]:
import logging
import time

# Load the model
shared.model, shared.tokenizer = load_model(shared.model_name)


# Add Lora to the model if needed

In [None]:
#from modules.LoRA import add_lora_to_model
def add_lora_to_model(lora_names):
    prior_set = set(shared.lora_names)
    added_set = set(lora_names) - prior_set
    removed_set = prior_set - set(lora_names)
    shared.lora_names = list(lora_names)

    # If no LoRA needs to be added or removed, exit
    if len(added_set) == 0 and len(removed_set) == 0:
        return

    # Add a LoRA when another LoRA is already present
    if len(removed_set) == 0 and len(prior_set) > 0:
        logging.info(f"Adding the LoRA(s) named {added_set} to the model...")
        for lora in added_set:
            shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)

        return

    # If any LoRA needs to be removed, start over
    if len(removed_set) > 0:
        shared.model.disable_adapter()
        shared.model = shared.model.base_model.model

    if len(lora_names) > 0:
        logging.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names)))
        params = {}
        if not shared.args.cpu:
            params['dtype'] = shared.model.dtype
            if hasattr(shared.model, "hf_device_map"):
                params['device_map'] = {"base_model.model." + k: v for k, v in shared.model.hf_device_map.items()}
            elif shared.args.load_in_8bit:
                params['device_map'] = {'': 0}

        shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), **params)

        for lora in lora_names[1:]:
            shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora)

        if not shared.args.load_in_8bit and not shared.args.cpu:
            shared.model.half()
            if not hasattr(shared.model, "hf_device_map"):
                if torch.has_mps:
                    device = torch.device('mps')
                    shared.model = shared.model.to(device)
                else:
                    shared.model = shared.model.cuda()

In [None]:
if shared.args.lora_to_load:
    add_lora_to_model(shared.args.lora_to_load)#here lora_to_load is None. 

# Load and Clean Training Data

## Encode the text data

In [121]:
def split_chunks(arr, step):#to get step-Gram tokens
    for i in range(0, len(arr), step):
        yield arr[i:i + step]

#load data
with open('training/datasets/em1.txt', 'r', encoding='utf-8') as file:
    raw_text = file.read().replace('\r', '')

hard_cut_string="\\n\\n\\n"
cut_string = hard_cut_string.replace('\\n', '\n')

# tokenize to embeddings of shape (53,128)
out_tokens = []
for text_part in raw_text.split(cut_string):
    if text_part.strip() == '':
        continue

    tokens = shared.tokenizer.encode(text_part)#encode text to embeddings: vector with len(tokens)=6741
    step = shared.args.cutoff_len - shared.args.overlap_len
    if step <= 0:
        logging.warning(f"Error: overlap_len ({shared.args.overlap_len}) cannot be greater than or equal to cutoff_len ({shared.args.cutoff_len})")
        break

    tokens = list(split_chunks(tokens, step))
    for i in range(1, len(tokens)):
        tokens[i] = tokens[i - 1][-shared.args.overlap_len:] + tokens[i]
        #while overlap_len=128, token[i]=tokens[i-1]+token[i]
        #Note this is list sum. It is equivalent to concatenate two list.

    out_tokens.extend(tokens)
    #del tokens

In [122]:
l,w=len(out_tokens),len(out_tokens[0])
print(l,w)

53 128


In [123]:
print(out_tokens[0])

[2, 25194, 7, 5, 1929, 9, 14973, 4, 653, 51, 109, 259, 51, 206, 64, 464, 5, 499, 9, 9187, 4, 20, 1428, 1370, 639, 24, 70, 16, 1324, 15104, 7315, 4, 20, 5674, 137, 5, 790, 6, 15104, 7315, 8, 5, 21204, 9, 3777, 4, 152, 186, 15, 9387, 19305, 4, 50118, 771, 26107, 2796, 11126, 534, 35, 15104, 7315, 6, 2814, 7, 8640, 23617, 255, 10227, 4, 166, 6329, 1642, 209, 5453, 30, 1996, 47, 10, 410, 828, 59, 3618, 6, 147, 47, 58, 2421, 6, 147, 47, 439, 7, 334, 6, 99, 47, 109, 8, 172, 52, 17, 27, 890, 95, 1339, 24, 62, 31, 89, 4, 50118, 448, 3048, 530, 35, 2647, 6, 38, 21, 2421, 11, 391, 1327, 6, 3033, 89, 454, 38]


In [129]:
raw_text.split(cut_string)
#something goes wrong here. As no space after '\n', we can not split the text into small parts.
#we have to encode the whole text in one step. 

['Welcome to the floor of SpaceX. What they do here they think can change the future of humanity. The driving force behind it all is CEO Elon Musk. The topic before the house, Elon Musk and the frontier of Technology. This week on Think Tank.\nWATTENBERG: Elon Musk, welcome to THINK TANK. We normally begin these sessions by asking you a little bit about background, where you were born, where you went to school, what you do and then we’ll just pick it up from there.\nMUSK: Well, I was born in South Africa, lived there until I was 17. Came to North America of my own accord, against my parent’s wishes. And was in Canada for a few years. I started school there which is where I met my wife.\nTransferred down to the University of Pennsylvania and got a degree in physics, degree in business at Wharton. Came out to California with the intent of doing a PHD in the material science and physics [unintelligible] with an eye towards using that as an energy storage unit for electric vehicles.\nI end

## look into the tokens

In [109]:
#original tokens
for text_part in raw_text.split(cut_string):
    if text_part.strip() != '':
        tokens_orig = shared.tokenizer.encode(text_part)#encode text to embeddings
        break #only encode the first token (text_part) for testing

In [110]:
len(tokens_orig)

6741

In [111]:
#tokens after chunk
step = shared.args.cutoff_len - shared.args.overlap_len
tokens = list(split_chunks(tokens_orig, step))

In [112]:
print(len(tokens),len(tokens[0]),len(tokens[1]),len(tokens[-1]))

53 128 128 85


In [118]:
print(len(tokens[51]),len(tokens[51]),len(tokens[51]+tokens[52]))
#sum is to concatenate two list. The last list has different length.

128 128 213


In [126]:
print(tokens[0][-10:])

[21, 2421, 11, 391, 1327, 6, 3033, 89, 454, 38]


## Generate training dataset

In [15]:
from datasets import Dataset as dt, load_dataset

In [18]:
from datasets import Dataset as dt, load_dataset
def cut_chunk_for_newline(chunk: str, max_length: int):
    if '\n' not in chunk:
        return chunk

    first_newline = chunk.index('\n')
    if first_newline < max_length:
        chunk = chunk[first_newline + 1:]

    if '\n' not in chunk:
        return chunk

    last_newline = chunk.rindex('\n')
    if len(chunk) - last_newline < max_length:
        chunk = chunk[:last_newline]

    return chunk

def tokenize(prompt):
    result = shared.tokenizer(prompt, truncation=True, max_length=shared.args.cutoff_len + 1, padding="max_length")
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }

#del raw_text  # Note: could be a gig for a large dataset, so delete redundant data as we go to be safe on RAM
text_chunks = [shared.tokenizer.decode(x) for x in out_tokens]#convert emb to text
#del out_tokens
if shared.args.newline_favor_len > 0:#cut output text with favorate line length
    text_chunks = [cut_chunk_for_newline(x, shared.args.newline_favor_len) for x in text_chunks]

train_data = dt.from_list([tokenize(x) for x in text_chunks])
#del text_chunks
eval_data = None

In [130]:
shared.args.newline_favor_len

128

In [127]:
print(text_chunks)

['</s>Welcome to the floor of SpaceX. What they do here they think can change the future of humanity. The driving force behind it all is CEO Elon Musk. The topic before the house, Elon Musk and the frontier of Technology. This week on Think Tank.\nWATTENBERG: Elon Musk, welcome to THINK TANK. We normally begin these sessions by asking you a little bit about background, where you were born, where you went to school, what you do and then we’ll just pick it up from there.', '</s>Welcome to the floor of SpaceX. What they do here they think can change the future of humanity. The driving force behind it all is CEO Elon Musk. The topic before the house, Elon Musk and the frontier of Technology. This week on Think Tank.\nWATTENBERG: Elon Musk, welcome to THINK TANK. We normally begin these sessions by asking you a little bit about background, where you were born, where you went to school, what you do and then we’ll just pick it up from there.\nMUSK: Well, I was born in South Africa, lived ther

In [137]:
train_data

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 53
})

In [141]:
len(train_data['attention_mask'])

53

# Model training

In [31]:
shared.args.lora_name='em1'

In [32]:
from peft import (LoraConfig, get_peft_model, prepare_model_for_int8_training,
                  set_peft_model_state_dict)
import traceback

def clean_path(base_path: str, path: str):
    """Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
    # TODO: Probably could do with a security audit to guarantee there's no ways this can be bypassed to target an unwanted path.
    # Or swap it to a strict whitelist of [a-zA-Z_0-9]
    path = path.replace('\\', '/').replace('..', '_')
    if base_path is None:
        return path

    return f'{Path(base_path).absolute()}/{path}'

model_id = "llama"
lora_file_path = clean_path(None, shared.args.lora_name)
lora_file_path = f"{shared.args.lora_dir}/{lora_file_path}"

# This mapping is from a very recent commit, not yet released.
# If not available, default to a backup map for some common model types.
try:
    from peft.utils.other import \
        TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as \
        model_to_lora_modules
    from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
    MODEL_CLASSES = {v: k for k, v in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES}
except:
    standard_modules = ["q_proj", "v_proj"]
    model_to_lora_modules = {"llama": standard_modules, "opt": standard_modules, "gptj": standard_modules, "gpt_neox": ["query_key_value"]}
    MODEL_CLASSES = {
        "LlamaForCausalLM": "llama",
        "OPTForCausalLM": "opt",
        "GPTJForCausalLM": "gptj",
        "GPTNeoXForCausalLM": "gpt_neox"
    }


config = LoraConfig(
    r=shared.args.lora_rank,
    lora_alpha=shared.args.lora_alpha,
    target_modules=model_to_lora_modules[model_id],
    lora_dropout=shared.args.lora_dropout,
    bias="none",
    task_type="CAUSAL_LM"
)

try:
    logging.info("Creating LoRA model...")
    lora_model = get_peft_model(shared.model, config)
    if not shared.args.always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
        logging.info("Loading existing LoRA data...")
        state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
        set_peft_model_state_dict(lora_model, state_dict_peft)
except:
    print(traceback.format_exc())

In [33]:
print(shared.args.always_override)

False


In [41]:
shared.args.epochs=1

In [42]:
import math
class Callbacks(transformers.TrainerCallback):
    def on_step_begin(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
        tracked.current_steps = state.global_step * gradient_accumulation_steps
        tracked.max_steps = state.max_steps * gradient_accumulation_steps
        if WANT_INTERRUPT:
            control.should_epoch_stop = True
            control.should_training_stop = True
        elif state.global_step > 0 and actual_save_steps > 0 and state.global_step % actual_save_steps == 0:
            lora_model.save_pretrained(f"{lora_file_path}/checkpoint-{tracked.current_steps}/")

    def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
        tracked.current_steps += 1
        if WANT_INTERRUPT:
            control.should_epoch_stop = True
            control.should_training_stop = True


trainer = transformers.Trainer(
    model=lora_model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=shared.args.micro_batch_size,
        gradient_accumulation_steps=shared.args.batch_size // shared.args.micro_batch_size,
        warmup_steps=math.ceil(shared.args.warmup_steps / (shared.args.batch_size // shared.args.micro_batch_size)),
        num_train_epochs=shared.args.epochs,
        learning_rate=float(shared.args.learning_rate),
        fp16=False,
        optim=shared.args.optimizer,
        logging_steps=5,
        evaluation_strategy="steps" if eval_data is not None else "no",
        eval_steps=math.ceil(shared.args.eval_steps / shared.args.gradient_accumulation_steps) if eval_data is not None else None,
        save_strategy="steps" if eval_data is not None else "no",
        output_dir=lora_file_path,
        lr_scheduler_type=shared.args.lr_scheduler_type,
        load_best_model_at_end=eval_data is not None,
        # TODO: Enable multi-device support
        ddp_find_unused_parameters=None,
        no_cuda=shared.args.cpu
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
    callbacks=list([Callbacks()])
)


In [43]:
print(shared.args.optimizer)

adamw_torch


In [44]:
lora_model.config.use_cache = False

if torch.__version__ >= "2" and sys.platform != "win32":
    lora_model = torch.compile(lora_model)

In [54]:
import json
import threading

WANT_INTERRUPT = False
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string"]

class Tracked():
    def __init__(self):
        self.current_steps = 0
        self.max_steps = 0
        self.did_save = False

tracked = Tracked()
gradient_accumulation_steps=shared.args.batch_size // shared.args.micro_batch_size
actual_save_steps = math.ceil(shared.args.save_steps / gradient_accumulation_steps)

def format_time(seconds: float):
    if seconds < 120:
        return f"`{seconds:.0f}` seconds"

    minutes = seconds / 60
    if minutes < 120:
        return f"`{minutes:.0f}` minutes"

    hours = minutes / 60
    return f"`{hours:.0f}` hours"

# == Save parameters for reuse ==
#with open(f"{lora_file_path}/training_parameters.json", 'w', encoding='utf-8') as file:
#    vars = locals()
#    json.dump({x: vars[x] for x in PARAMETERS}, file)

# == Main run and monitor loop ==
logging.info("Starting training...")
def threaded_run():
    trainer.train()
    # Note: save in the thread in case the gradio thread breaks (eg browser closed)
    lora_model.save_pretrained(lora_file_path)
    logging.info("LoRA training run is completed and saved.")
    tracked.did_save = True

thread = threading.Thread(target=threaded_run)
thread.start()
last_step = 0
start_time = time.perf_counter()


while thread.is_alive():
    time.sleep(0.5)
    if WANT_INTERRUPT:
        pass
    #     yield "Interrupting, please wait... *(Run will stop after the current training step completes.)*"

    elif tracked.current_steps != last_step:
        last_step = tracked.current_steps
        time_elapsed = time.perf_counter() - start_time
        if time_elapsed <= 0:
            timer_info = ""
            total_time_estimate = 999
        else:
            its = tracked.current_steps / time_elapsed
            if its > 1:
                timer_info = f"`{its:.2f}` it/s"
            else:
                timer_info = f"`{1.0/its:.2f}` s/it"

            total_time_estimate = (1.0 / its) * (tracked.max_steps)

        logging.info(f"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining")


Step,Training Loss


Exception in thread Thread-8 (threaded_run):
Traceback (most recent call last):
  File "/Users/shhe/opt/anaconda3/envs/trainlora/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/Users/shhe/opt/anaconda3/envs/trainlora/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/58/q_xbhc496k95t3ydn54m00z80000gp/T/ipykernel_36290/2100478145.py", line 36, in threaded_run
  File "/Users/shhe/opt/anaconda3/envs/trainlora/lib/python3.10/site-packages/transformers/trainer.py", line 1664, in train
    return inner_training_loop(
  File "/Users/shhe/opt/anaconda3/envs/trainlora/lib/python3.10/site-packages/transformers/trainer.py", line 1940, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/Users/shhe/opt/anaconda3/envs/trainlora/lib/python3.10/site-packages/transformers/trainer.py", line 2753, in training_step
    loss.backward()
  File "/Users/shhe/opt/anaconda3/envs/