In [4]:
import os
import gc
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from datasets import Dataset
from peft import get_peft_model, LoraConfig, TaskType
from datetime import datetime
 
# Unique folder names per run
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
orig_dir = f"./models/orig_{timestamp}"
float16_dir = f"./models/float16_{timestamp}"
peft_dir = f"./models/peft_{timestamp}"
model_name = "distilgpt2"
 
# Load tokenizer and set padding token if needed
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Fix padding token issue
 
# Load model
model = AutoModelForCausalLM.from_pretrained(model_name)
 
# Save original model/tokenizer
model.save_pretrained(orig_dir)
tokenizer.save_pretrained(orig_dir)
 
# Print original size
def get_size(path):
    return sum(os.path.getsize(os.path.join(dp, f)) for dp, _, fn in os.walk(path) for f in fn) / (1024 ** 2)
 
print(f"Original model size: {get_size(orig_dir):.2f} MB")
print("Dtype before conversion:", next(model.parameters()).dtype)
 
# Convert to float16
model.half()
print("Dtype after conversion:", next(model.parameters()).dtype)
 
# Save float16 version
model.save_pretrained(float16_dir)
tokenizer.save_pretrained(float16_dir)
print(f"Float16 model size: {get_size(float16_dir):.2f} MB")
 
# Reload float16 model (free memory first)
del model
gc.collect()
model = AutoModelForCausalLM.from_pretrained(float16_dir, torch_dtype=torch.float16)
 
# Apply LoRA PEFT config
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, peft_config)
 
# Dummy dataset
texts = [
    "The cat sat on the mat.",
    "The quick brown fox jumps over the lazy dog.",
    "Artificial intelligence is transforming the world.",
]
 
inputs = tokenizer(texts, padding="max_length", truncation=True, max_length=32, return_tensors="pt")
 
dataset = Dataset.from_dict({
    "input_ids": inputs["input_ids"],
    "attention_mask": inputs["attention_mask"]
})
 
# Data collator function for Trainer
def collate_fn(batch):
    input_ids = torch.stack([torch.tensor(item["input_ids"]) for item in batch])
    attention_mask = torch.stack([torch.tensor(item["attention_mask"]) for item in batch])
    labels = torch.stack([torch.tensor(item["input_ids"]) for item in batch])  # labels = input_ids for causal LM
 
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels,
    }
 
 
# Training arguments
training_args = TrainingArguments(
    output_dir=f"./training_output/{timestamp}",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    logging_steps=1,
    save_steps=10,
    fp16=True,
    learning_rate=5e-4,
    save_total_limit=1,
)
 
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    data_collator=collate_fn,
)
 
# Train
trainer.train()
 
# Save LoRA fine-tuned model
model.save_pretrained(peft_dir)
tokenizer.save_pretrained(peft_dir)
print(f"LoRA fine-tuned model saved to: {peft_dir}")
 

RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
cannot import name 'clear_device_cache' from 'accelerate.utils.memory' (c:\Users\SreeKeerthiReddyThat\Desktop\Image-generation\venv\Lib\site-packages\accelerate\utils\memory.py)