In [1]:
import gc
gc.collect()
from peft import LoraConfig, get_peft_model

In [2]:
import torch
from torch import nn
import math
from transformers import (
    GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments,
    DataCollatorForLanguageModeling, pipeline
)
from datasets import load_dataset

In [3]:
model_name = "distilgpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.model_max_length = 512

base_model = GPT2LMHeadModel.from_pretrained(model_name)
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
lora_model = get_peft_model(base_model, lora_config)
lora_model.print_trainable_parameters()

trainable params: 147,456 || all params: 82,060,032 || trainable%: 0.1797




In [6]:
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=64)

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized_dataset.set_format("torch")

train_dataset = tokenized_dataset["train"].select(range(500))
eval_dataset = tokenized_dataset["validation"].select(range(200))

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


In [6]:
vanilla_model = GPT2LMHeadModel.from_pretrained(model_name)
vanilla_pipe = pipeline("text-generation", model=vanilla_model, tokenizer=tokenizer)

test_prompt = "Artificial intelligence will"
print("=== BEFORE Fine-tuning ===")
print(vanilla_pipe(test_prompt, max_length=10, num_return_sequences=1)[0]['generated_text'])


Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=10) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


=== BEFORE Fine-tuning ===
Artificial intelligence will do a lot more than just provide new ways to connect with online clients. To that end, researchers at Carnegie Mellon University and MIT have identified a new way to connect with an online database of human data. The goal is to create a database of human data that will not only provide new ways to connect with online clients, but will also help with the creation of social networks and artificial intelligence.

The researchers propose their research to be published online May 26 in the journal Science Advances.

"The human data that we're trying to create will be a database of human data and will be a database of social networks, social networks, and artificial intelligence," says Daniel A. Shiffman, a neuroscientist at Carnegie Mellon who led the research and is one of the authors of the paper. "We're not just talking about the Internet now; we're talking about the Internet as well."

The researchers are also developing a way of de

In [7]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    # Lower the learning rate significantly for fine-tuning
    learning_rate=3e-5, 
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    weight_decay=0.01,
    save_strategy="no",
    logging_dir="./logs",
    dataloader_pin_memory=False,
    # These are needed for Peft
    save_safetensors=False,
    # This is a new parameter for Peft
    save_total_limit=1
)

# Use the new lora_model in the Trainer
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

trainer.train()


Epoch,Training Loss,Validation Loss
1,No log,


TrainOutput(global_step=125, training_loss=5.48773974609375, metrics={'train_runtime': 441.601, 'train_samples_per_second': 1.132, 'train_steps_per_second': 0.283, 'total_flos': 8193835008000.0, 'train_loss': 5.48773974609375, 'epoch': 1.0})

In [13]:
trainer.save_model("./fine_tuned_gpt2_fc_only")
tokenizer.save_pretrained("./fine_tuned_gpt2_fc_only")


('./fine_tuned_gpt2_fc_only\\tokenizer_config.json',
 './fine_tuned_gpt2_fc_only\\special_tokens_map.json',
 './fine_tuned_gpt2_fc_only\\vocab.json',
 './fine_tuned_gpt2_fc_only\\merges.txt',
 './fine_tuned_gpt2_fc_only\\added_tokens.json')

In [10]:
trainer.save_model("./fine_tuned_gpt2_lora")
merged_model = lora_model.merge_and_unload()
merged_model.save_pretrained("./fine_tuned_gpt2_merged")
tokenizer.save_pretrained("./fine_tuned_gpt2_merged")

('./fine_tuned_gpt2_merged\\tokenizer_config.json',
 './fine_tuned_gpt2_merged\\special_tokens_map.json',
 './fine_tuned_gpt2_merged\\vocab.json',
 './fine_tuned_gpt2_merged\\merges.txt',
 './fine_tuned_gpt2_merged\\added_tokens.json')

In [4]:
# Define a repository name. Replace 'your-username' with your Hugging Face username.
repo_name = "Pruthvi-1029/fine-tuned-distilgpt2"

# Load the merged model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("./fine_tuned_gpt2_merged")
merged_model = GPT2LMHeadModel.from_pretrained("./fine_tuned_gpt2_merged")

# Push the model and tokenizer to the hub
merged_model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


CommitInfo(commit_url='https://huggingface.co/Pruthvi-1029/fine-tuned-distilgpt2/commit/d4391f2a4b56b1eeec2fe1408d50feb658ac4c28', commit_message='Upload tokenizer', commit_description='', oid='d4391f2a4b56b1eeec2fe1408d50feb658ac4c28', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Pruthvi-1029/fine-tuned-distilgpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='Pruthvi-1029/fine-tuned-distilgpt2'), pr_revision=None, pr_num=None)