In [2]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B", # or choose "unsloth/Llama-3.2-1B"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: Tesla P100-PCIE-16GB. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 6.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.2.15 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [5]:
import pandas as pd
df = pd.read_csv("/kaggle/input/desi-hip-hop-lyrics-verses-reverse-prompt/lyrics_described.csv")

In [6]:
import pandas as pd
from datasets import Dataset

# Updated prompt to reflect a rap lyrics generation persona
rap_alpaca_prompt = """You are a master rapper renowned for your innovative lyricism and style. 
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = "<|endoftext|>"  # Update this based on your tokenizer's EOS token

def format_dataset(df):
    texts = []
    for _, row in df.iterrows():
        instruction = "Generate a rap verse that matches the given description."
        # Input now includes artist name, title, and reverse prompt
        input_text = f"Artist: {row['artist']}\nTitle: {row['title']}\nDescription: {row['reverse_prompt']}"
        response_text = row["verse"]  # The actual lyrics
        # Format text similar to the Alpaca dataset using the updated prompt
        formatted_text = rap_alpaca_prompt.format(instruction, input_text, response_text) + EOS_TOKEN
        texts.append(formatted_text)
    
    return texts

# Apply formatting function
df["text"] = format_dataset(df)

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df[["text"]])


In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=8,  # Increase to 16
        gradient_accumulation_steps=4,     # Effective batch size = 16 * 4 = 64
        num_train_epochs=1,
        warmup_steps=5,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)


Converting train dataset to ChatML (num_proc=2):   0%|          | 0/3550 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=2):   0%|          | 0/3550 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/3550 [00:00<?, ? examples/s]

Truncating train dataset (num_proc=2):   0%|          | 0/3550 [00:00<?, ? examples/s]

In [12]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla P100-PCIE-16GB. Max memory = 15.888 GB.
15.494 GB of memory reserved.


In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,550 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 111
 "-____-"     Number of trainable parameters = 24,313,856


Step,Training Loss
1,2.7873
2,2.6233
3,2.684
4,2.5775
5,2.4769
6,2.2028
7,2.4546
8,2.2338
9,2.3602
10,2.0097


In [14]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")


('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

In [None]:
from unsloth import FastLanguageModel
import torch

# Enable faster inference
FastLanguageModel.for_inference(model)


def generate_rap_lyrics(description, artist_name="Unknown Artist", song_title="New Track", language="English"):
    # Modified instruction to include language preference
    instruction = f"Generate a rap verse in {language} that matches the given description."
    input_text = f"Artist: {artist_name}\nTitle: {song_title}\nDescription: {description}\nLanguage: {language}"
    
    prompt = rap_alpaca_prompt.format(instruction, input_text, "")
    
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    
    from transformers import TextStreamer
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    
    generated_ids = model.generate(
        input_ids=inputs.input_ids, 
        attention_mask=inputs.attention_mask,
        streamer=text_streamer, 
        max_new_tokens=256,         # Longer output for complete verses
        temperature=0.5,            # Add some creativity
        top_p=0.5,                  # Nucleus sampling for more diverse outputs
        repetition_penalty=1.2,     # Add repetition penalty to avoid repeated phrases
        no_repeat_ngram_size=3,     # Avoid repeating 3-grams
        do_sample=True,             # Enable sampling
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Get generated text without the prompt
    generated_text = tokenizer.decode(generated_ids[0, inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return generated_text

# Example usage - Hindi verse with Mumbai references
description = "A gritty street narrative with references to Mumbai streets and hustle culture"
artist_name = "Divine"
song_title = "Gully Flow"

print("\n🎵 Generated Hindi Rap Verse 🎵\n")
generate_rap_lyrics(description, artist_name, song_title, language="Hindi")

# Example with Hinglish option
description = "A motivational anthem about rising from struggles and achieving success despite obstacles"
artist_name = "Raftaar"
song_title = "Rise Up"

print("\n🎵 Generated Hinglish Rap Verse 🎵\n")
generate_rap_lyrics(description, artist_name, song_title, language="Hinglish")

# Example with pure Hindi lyrics
description = "A song about youth culture and social change in modern India"
artist_name = "MC Altaf"
song_title = "Naya Zamana"

print("\n🎵 Generated Pure Hindi Rap Verse 🎵\n")
generate_rap_lyrics(description, artist_name, song_title, language="Pure Hindi")


🎵 Generated Hindi Rap Verse 🎵

Gully flow gully flow
Mumbai mein chal rhaa huun maine kuchh bhi nahiin kiya
Kabhi kabhi mujhe lagtaa hai jaise maiyaa kaanpataa huaa
Aur jab bhee meraa naam sunaate tou merey saath meiin sabko jaantaa hain
Bhai logon ko parr bhoolnaa naa padega
Jaanke merrie gaane aur tere liye khud seey banake baat karuun
