In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Seq2SeqTrainingArguments, Seq2SeqTrainer

from torch.utils.data.dataset import Dataset

In [2]:
import os

os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [3]:
from peft import (
    prepare_model_for_int8_training,
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/jerry/miniconda3/envs/agi/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 6.1
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary /home/jerry/miniconda3/envs/agi/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so...


  warn(msg)
  warn(msg)
  warn(msg)


In [4]:
class MyDataset(Dataset):
    def __init__(self, tokenizer, file_path, block_size):
        super().__init__()

        self.tokenizer = tokenizer
        self.block_size = block_size

        with open(file_path, 'r') as f:
            lines = f.read().splitlines()

        # Split long lines into smaller parts
        lines = [line[i:i+block_size] for line in lines for i in range(0, len(line), block_size)]

        self.examples = [tokenizer.encode(line) for line in lines]

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, i):
        input_ids = self.examples[i][:self.block_size]
        target_ids = self.examples[i][1:self.block_size+1]  # Shifted right

        # Pad input and target sequences to the block size
        input_ids = input_ids + [self.tokenizer.pad_token_id] * (self.block_size - len(input_ids))
        target_ids = target_ids + [-100] * (self.block_size - len(target_ids))  # -100 is the index that PyTorch uses to ignore a token in loss computation

        return {
            'input_ids': input_ids,
            'labels': target_ids,
        }


In [5]:
LORA_R = 8  # Some LoRA parameters
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
LEARNING_RATE = 2e-5
TARGET_MODULES = [
    'q_proj',
    'v_prol',
]

In [6]:
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias='none',
    task_type='CAUSAL_LM',
)

In [7]:
endpoint = "EleutherAI/gpt-j-6B"

In [8]:
# 1. Prepare your data
# Assume you have a 'text.txt' file containing your unlabeled data
model = AutoModelForCausalLM.from_pretrained(
    endpoint,
    load_in_8bit=True,
    device_map="auto"
)



In [9]:
model = prepare_model_for_int8_training(model)
model = get_peft_model(model, config)

In [10]:
# 2. Create a tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    endpoint,
    add_eos_token=True
)
tokenizer.pad_token_id = 0 # have pad_token_id different from eos token

In [11]:
# create dataset
dataset = MyDataset(
    tokenizer=tokenizer,
    file_path="./data/shakespere.txt",
    block_size=256,
)

In [12]:
# 5. Fine-tune the model
training_args = Seq2SeqTrainingArguments(
    output_dir="./gptl_finetuned/shakespear", # The output directory
    overwrite_output_dir=True, # Overwrite the content of the output directory
    learning_rate=LEARNING_RATE,
    num_train_epochs=2, # Number of training epochs
    per_device_train_batch_size=1, # Batch size for training
    save_steps=10000, # Number of updates steps before two checkpoint saves
    save_total_limit=2, # Limit the total amount of checkpoints and deletes the older checkpoints,
)





In [13]:
old_state_dict = model.state_dict
model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))

In [14]:
# import os

# os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [15]:
# 3. Create a dataset
# data_dir = './data/txt'

# dataset = MyDataset(
#     tokenizer=tokenizer,
#     file_path=f'{data_dir}/all.txt',
#     block_size=512,
# )
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
)    

In [16]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m111012912jerry[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669609133775035, max=1.0…

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
500,8.5145
1000,7.4781
1500,7.0457
2000,6.971
2500,6.9063
3000,6.8244
3500,6.7528
4000,6.7015
4500,6.6262
5000,6.6453




TrainOutput(global_step=65554, training_loss=6.159881325977289, metrics={'train_runtime': 149113.0529, 'train_samples_per_second': 0.44, 'train_steps_per_second': 0.44, 'total_flos': 5.88667390868226e+17, 'train_loss': 6.159881325977289, 'epoch': 2.0})

In [17]:
trainer.save_model()

In [18]:
from transformers import pipeline
from transformers import GenerationConfig

In [21]:
def generate_prompt(instruction, input=None):
    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{input}
### Response:
"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
"""

In [None]:
def generate(instruction, input=None):
    prompt = generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=GenerationConfig(temperature=0.2, top_p=0.75, num_beams=4),
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for seq in generation_output.sequences:
        output = tokenizer.decode(seq)
        print("Respuesta:", output.split("### Response:")[1].strip())

In [19]:
llm = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
result = llm("please answer the question: what is hamlet")
print(result)

The model 'PeftModelForCausalLM' is not supported for text2text-generation. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'XLMProphetNetForConditionalGeneration'].
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'please answer the question: what is hamlet???.\n?.?.?.'}]


In [20]:
llm_new = pipeline('text2text-generation', model=model, tokenizer=tokenizer, device="cuda:0")
result_new = llm_new('please answer the question: to be or not to be')
print(result_new)

The model 'PeftModelForCausalLM' is not supported for text2text-generation. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'XLMProphetNetForConditionalGeneration'].
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'please answer the question: to be or not to be, is?\n\n. I,,'}]
