In [1]:
import os

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1,3,4,5,6,7"

os.environ['CUDA_PATH']='/usr/local/cuda-11'

In [2]:
import pandas as pd
import numpy as np
from datasets import load_dataset, Dataset
import re

In [3]:
from tune_gorilla import prep_dataset, prep_model

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

In [5]:
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
tokenizer.add_special_tokens({'pad_token': '<?>'})

def tokenize_function(examples):
    return tokenizer(examples['instruction'], text_target=examples['output'], 
#                      truncation=True, padding='max_length', max_length=640
                    )

In [6]:
# prepare the dataset

tok_hf_train_dset = prep_dataset('/mnt/data/mart/gorilla/data/apibench/huggingface_train.json')
tok_hf_eval_dset = prep_dataset('/mnt/data/mart/gorilla/data/apibench/huggingface_eval.json')

Map:   0%|          | 0/8081 [00:00<?, ? examples/s]

Map:   0%|          | 0/899 [00:00<?, ? examples/s]

In [7]:
model = prep_model("/mnt/data/mart/falcon-7b-sharded-bf16")

In [1]:
from transformers import TrainingArguments, Trainer

In [9]:
training_args = TrainingArguments(output_dir="/mnt/data/mart/test_trainer", learning_rate=2e-5, 
                                         num_train_epochs=5, warmup_ratio=0.03, 
                                         gradient_accumulation_steps=64, save_strategy='epoch',
                                         load_best_model_at_end=True,
                                         per_device_train_batch_size=1, evaluation_strategy='epoch', 
                                         lr_scheduler_type='cosine')

In [10]:
trainer = Trainer(model=model, args=training_args, 
                         train_dataset=tok_hf_train_dset, 
                         eval_dataset=tok_hf_eval_dset, 
                  data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),)

In [12]:
batch = trainer.get_train_dataloader().__iter__().__next__()

In [11]:
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [10]:
model = model.eval()

In [11]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusFor

In [12]:
sequences = pipeline(
   "The user is interested in a tool to find relationships between medical terms.",
    max_length=1024,
    do_sample=True,
    top_k=1,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: The user is interested in a tool to find relationships between medical terms.<___:::('('-('-('----------------
















:::::ersenceenceersenceersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersersers.---------.-.-----.-.-.......................................................
.....
.....
..




























































  - - - - -  - -  - -    - - - -      
