In [8]:
from datasets import load_dataset, Dataset
import pandas as pd

# load the processed dataset
base_path = "Bevo-Bud-The-GPT/model-src"
train_data = load_dataset("json", data_files=f"{base_path}/train-v1.json", split='train')



In [81]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import setup_chat_format
base_model = "distilbert/distilgpt2"

model = AutoModelForCausalLM.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True, max_length=1024)

model, tokenizer = setup_chat_format(model, tokenizer)

tokenizer.pad_token = tokenizer.eos_token
version = "1.2"
print("Saving as verion: ", version)


Saving as verion:  1.2


In [24]:
from trl import SFTTrainer 
from peft import LoraConfig, get_peft_model

training_args = {
    # "per_device_train_batch_size": 2,
    "per_device_eval_batch_size": 4,
    # "gradient_accumulation_steps": 4,
    "num_train_epochs": 3,
    # "logging_steps": 1,
    # "save_steps": 100,
    "seed": 42,
    "output_dir": f"{base_path}/Bevo-Bud",
    # "push_to_hub": True,
    "overwrite_output_dir": True
}

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

training_args = TrainingArguments(**training_args)
trainer = SFTTrainer(
            model,
            train_dataset=train_data,
            tokenizer=tokenizer,
            packing=True,
            peft_config=peft_config,
            args=training_args,
            
        )

trainer.train()

Step,Training Loss


TrainOutput(global_step=420, training_loss=10.769466145833333, metrics={'train_runtime': 14528.6956, 'train_samples_per_second': 0.23, 'train_steps_per_second': 0.029, 'total_flos': 879309241712640.0, 'train_loss': 10.769466145833333, 'epoch': 3.0})

In [33]:
trainer.save_model(f"{base_path}/Bevo-Bud")



### Testing

In [59]:
from transformers import pipeline, Conversation
from peft import PeftConfig, PeftModel

tokenizer = AutoTokenizer.from_pretrained(f"{base_path}/Bevo-Bud")
config = PeftConfig.from_pretrained(f"{base_path}/Bevo-Bud")

generator = pipeline('text-generation', model=f"{base_path}/Bevo-Budv{version}", tokenizer=tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [83]:
messages = "Can I still graduate if my GPA is below the minimum"
generator(messages, max_new_tokens=128)

## Uploading Model To Hub

In [82]:
from huggingface_hub import notebook_login
notebook_login()
trainer.push_to_hub(f"Bevo-Bud")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

events.out.tfevents.1713985198.82d7450fbbdd.5942.0:   0%|          | 0.00/5.00k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

events.out.tfevents.1713985235.82d7450fbbdd.5942.1:   0%|          | 0.00/5.57k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.18M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Kelechie/tmp_trainer/commit/a5daaa92c8176af9a9d307f73b97b613e83db901', commit_message='Bevo-Budv1.0', commit_description='', oid='a5daaa92c8176af9a9d307f73b97b613e83db901', pr_url=None, pr_revision=None, pr_num=None)