In [1]:
# Install dependencies (Uncomment if needed)
# !pip install transformers datasets torch huggingface_hub

import os
import torch
import pandas as pd
from datasets import load_dataset, DatasetDict
from transformers import (
    T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer, DataCollatorForSeq2Seq
)
from huggingface_hub import login
import torch
torch.utils.backcompat.broadcast_warning.enabled = False  # Disable PyTorch warnings

import warnings
import wandb

# Suppress specific FutureWarnings
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=UserWarning)


# Authenticate with Hugging Face
HF_TOKEN = "hf_xxxxxxx"  # Replace with your HF token
login(token=HF_TOKEN)

# Load dataset from Hugging Face
dataset = load_dataset("treysarkar/PromptPimpSynth")

# Split 90% train / 10% validation
dataset = DatasetDict({
    "train": dataset["train"].shuffle(seed=42).select(range(int(0.9 * len(dataset["train"])))), 
    "validation": dataset["train"].shuffle(seed=42).select(range(int(0.9 * len(dataset["train"])), len(dataset["train"])))
})

# Load tokenizer
MODEL_NAME = "google-t5/t5-base"
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)

# Tokenization function
def tokenize_function(examples):
    inputs = tokenizer(
        examples["short_prompt"], max_length=256, padding="max_length", truncation=True
    )
    targets = tokenizer(
        examples["long_prompt"], max_length=256, padding="max_length", truncation=True
    )
    inputs["labels"] = targets["input_ids"]
    return inputs

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["short_prompt", "long_prompt"])

# Let us now load the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME).to(device)
wandb.login(key="5xxxxxxx") # wandB for tracking in visual fashionm
wandb.init(project="imagia-finetune-v3", name="imagiav3")


training_args = TrainingArguments(
    run_name="T5-Imagia-runv2",
    output_dir="./T5-finetuned",
    eval_strategy="steps",
    eval_steps=800,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=50,
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    push_to_hub=True,  
    hub_model_id="treysarkar/T5-PromptPimp-Finetunedv2",  
    hub_token=HF_TOKEN,  
    report_to="wandb"
    
)

# Data collator for padding
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    processing_class=tokenizer,
    data_collator=data_collator
)

# Start training
trainer.train()

# Save model locally
trainer.save_model("./T5-finetuned")
tokenizer.save_pretrained("./T5-finetuned")


trainer.push_to_hub()

print("Training completed! Model uploaded to Hugging Face.")


README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/10.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16110 [00:00<?, ? examples/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/14499 [00:00<?, ? examples/s]

Map:   0%|          | 0/1611 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtreysarkar[0m ([33mtreysarkar-auk[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss,Validation Loss
800,1.4486,1.396306
1600,1.4288,1.350275
2400,1.3987,1.328038
3200,1.4023,1.316131
4000,1.3899,1.310221


No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


Training completed! Model uploaded to Hugging Face.


In [8]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, TextStreamer
import torch
import sys
import time

# Load the model and tokenizer
model_name = "treysarkar/T5-PromptPimp-Finetunedv2"
device = "cuda" if torch.cuda.is_available() else "cpu"  # Automatically detect device

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)  # Move model to GPU

# For the ChatGPT ahhhhhh style output
streamer = TextStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True
)

while True:  # Infinite loop
    print("\n\n")
    text = "Imagine please\n"

    # Animated typing effect
    for char in text:
        sys.stdout.write(char)
        sys.stdout.flush()
        time.sleep(0.02)

    prmpt = input("")  # User enters prompt

    if prmpt.strip().lower() == "exit":  # Escape condition
        break

    # Tokenize input and move tensors to the correct device
    input_ids = tokenizer(prmpt, return_tensors="pt").input_ids.to(device)

    # Generate response with streaming
    print("\nModel Output:\n")
    with torch.no_grad():  # Disable gradients for inference
        model.generate(
            input_ids, 
            max_new_tokens=50,  
            streamer=streamer  
        )





Imagine please


 Sky fills with cirrostratus clouds, sun sets behind dark triangle cloud.



Model Output:

A close up view of a large cirrostratus cloud that is filled with dark circles. The sun is setting behind the cirrostratus cloud. The sky is filled with dark circles that are shaped like triangles



Imagine please


 exit
