In [1]:
# !pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
# !pip install "git+https://github.com/huggingface/transformers.git"

In [3]:
# !pip install trl

In [4]:
from datasets import load_dataset
import pandas as pd

ModuleNotFoundError: No module named 'datasets'

In [None]:
# !pip install huggingface-cli

In [None]:
!huggingface-cli login

In [None]:

dataset = load_dataset("mememahal1/llama3-100-texts-sample")

In [None]:
dataset

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    # "unsloth/mistral-7b-bnb-4bit",
    # "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    # "unsloth/llama-2-7b-bnb-4bit",
    # "unsloth/gemma-7b-bnb-4bit",
    # "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
    # "unsloth/gemma-2b-bnb-4bit",
    # "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
    "unsloth/llama-3-8b-bnb-4bit"] # [NEW] 15 Trillion token Llama-3

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

In [None]:
meme_prompt = """Generate a one-liner caption (top text) for a meme image. The caption should directly answer the meme's context or question, adhering strictly to the format and tags provided. No additional commentary or text outside of the caption is required.
You must keep the following things in mind while creating top text:
1. Generate a funny top text in max 12-13 words without any emojis or anything extra. Keep it simple consisting only of words.
2. You have to make sure that the top text generated makes sense with the bottom text that is provided below.
3. Please don't give me any reasoning, explanation, arguments, or any explanation in brackets
4. Don't copy-paste words from the bottom text. Generate a top text such that when one reads the top text and then the bottom text one can make sense of the complete meme and also find it extremely funny
5. Try to use all the mandatory formats when we refresh the response


### Instruction:
{}
### Input:
{}
### Response:
{}
"""
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = meme_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("vermaavesh/test_dataset")
dataset = dataset.map(formatting_prompts_func, batched = True)

In [None]:
dataset['train']

In [None]:
dataset1 = load_dataset("yahma/alpaca-cleaned", split = "train")

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset['train'],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer_stats = trainer.train()

In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
# alpaca_prompt = Copied from above
def generate_meme(user_prompt, image_desc, template_text) -> str:
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
    input_text = f"Image Description: {image_desc}. Template Text: {template_text}"
    inputs = tokenizer(
    [
        meme_prompt.format(
            user_prompt, # instruction
            input_text, # input
            "", # output - leave this blank for generation!
        )
    ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    return tokenizer.batch_decode(outputs)[0].split("Response:\n")[-1].strip().rstrip(":")

In [None]:
user_prompt = "Humorous meme about the challenges of WFH (Work From Home) culture in India."
image_desc = "In this image, an individual with a mustache is shown making a stern face, with his eyes wide open in a rather threatening manner. The text Amma Behen pe aa jaunga mai loosely translates to I will bring your mother and sister into this, suggesting a warning that things could escalate to involve family, which is often a serious turn in many cultures. The close-up and grainy quality of the image suggest it's a screen capture from a video, possibly adding to the dramatic effect."
template_text = "Amma behen pe aa jaunga mai"

out = generate_meme(user_prompt, image_desc, template_text)
print(out)

In [None]:
model.save_pretrained("mememahal_unsloth-llama3-8b-bnb-4bit") # Local saving
tokenizer.save_pretrained("mememahal_unsloth-llama3-8b-bnb-4bit")

In [None]:
model.push_to_hub("vermaavesh/mememahal_unsloth-llama3-8b-bnb-4bit", token = "...") # Online saving
tokenizer.push_to_hub("vermaavesh/mememahal_unsloth-llama3-8b-bnb-4bit", token = "...") # Online saving