## Step 1: Install Dependencies

In [None]:
%%capture
# Install Unsloth for efficient LLM fine-tuning
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers trl peft accelerate bitsandbytes triton
!pip install datasets

In [None]:
# Verify installations and GPU
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è No GPU detected! Go to Runtime ‚Üí Change runtime type ‚Üí GPU")

## Step 2: Upload Dataset
Upload the `bbc_hindi_articles_with_categories_cleaned.csv` file

In [None]:
from google.colab import files
import os

# Check if file already exists
csv_path = "bbc_hindi_articles_with_categories_cleaned.csv"

if not os.path.exists(csv_path):
    print("Please upload the CSV file:")
    uploaded = files.upload()
    if csv_path in uploaded:
        print(f"‚úì File uploaded successfully!")
else:
    print(f"‚úì File already exists: {csv_path}")

## Step 3: Load the Model
Using Unsloth to load Llama-3 8B in 4-bit quantization

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None  # Auto-detect
load_in_4bit = True  # Use 4-bit quantization to reduce memory

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-8b-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("‚úì Model loaded successfully!")

## Step 4: Configure LoRA Adapters
Setting up Low-Rank Adaptation for efficient fine-tuning

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

print("‚úì LoRA adapters configured!")

## Step 5: Prepare the Dataset

In [None]:
from datasets import load_dataset

def formatting_prompts_func(example):
    """Format data into training prompts"""
    output_texts = []
    for i in range(len(example['Headline'])):
        text = f"### Headline: {example['Headline'][i]}\n ### Category: {example['Category'][i]}  ### Article: {example['Content'][i]}"
        output_texts.append(text)
    return {"text": output_texts}

# Load dataset
csv_path = "bbc_hindi_articles_with_categories_cleaned.csv"
dataset = load_dataset('csv', data_files=csv_path, split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

print(f"‚úì Dataset loaded: {len(dataset)} samples")
print(f"\nSample headline: {dataset[0]['Headline'][:80]}...")
print(f"Category: {dataset[0]['Category']}")

## Step 6: Setup Trainer

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
import os

os.environ["WANDB_DISABLED"] = "true"

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        num_train_epochs=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

print("‚úì Trainer configured!")

## Step 7: Check GPU Memory Before Training

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)

print(f"GPU: {gpu_stats.name}")
print(f"Max memory: {max_memory} GB")
print(f"Memory reserved: {start_gpu_memory} GB")

## Step 8: Train the Model üöÄ

In [None]:
print("Starting training...")
trainer_stats = trainer.train()
print("\n‚úì Training complete!")

In [None]:
# Training Summary
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)

print(f"Training time: {trainer_stats.metrics['train_runtime']:.1f} seconds")
print(f"Training time: {trainer_stats.metrics['train_runtime']/60:.2f} minutes")
print(f"Peak memory: {used_memory} GB ({used_percentage}%)")
print(f"Memory for LoRA training: {used_memory_for_lora} GB ({lora_percentage}%)")

## Step 9: Generate Hindi Articles! üìù
Now let's test the fine-tuned model

In [None]:
# Switch to inference mode
FastLanguageModel.for_inference(model)

# Test headline 1
headline = "‡§≠‡§æ‡§∞‡§§‡•Ä‡§Ø ‡§∂‡•á‡§Ø‡§∞ ‡§¨‡§æ‡§ú‡§æ‡§∞ ‡§Æ‡•á‡§Ç ‡§§‡•á‡§ú‡•Ä"
category = "‡§≠‡§æ‡§∞‡§§"

inputs = tokenizer(
    [f"### Headline: {headline}\n ### Category: {category}  ### Article: "],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True)
generated_text = tokenizer.batch_decode(outputs)[0]

print("="*60)
print(f"Headline: {headline}")
print(f"Category: {category}")
print("="*60)
print("Generated Article:")
print(generated_text.split("### Article:")[1] if "### Article:" in generated_text else generated_text)

In [None]:
# Test headline 2 with sampling
headline = "‡§™‡•Ä‡§è‡§Æ ‡§Æ‡•ã‡§¶‡•Ä ‡§Ö‡§´‡•ç‡§∞‡•Ä‡§ï‡§æ ‡§¶‡•å‡§∞‡•á ‡§™‡§∞ ‡§ó‡§è"
category = "‡§≠‡§æ‡§∞‡§§"

inputs = tokenizer(
    [f"### Headline: {headline}\n ### Category: {category}  ### Article: "],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=256,
    use_cache=True,
    do_sample=True,
    top_p=0.9,
    temperature=0.8,
)
generated_text = tokenizer.batch_decode(outputs)[0]

print("="*60)
print(f"Headline: {headline}")
print(f"Category: {category}")
print("="*60)
print("Generated Article:")
print(generated_text.split("### Article:")[1] if "### Article:" in generated_text else generated_text)

In [None]:
# Try your own headline!
headline = "‡§ï‡•ç‡§∞‡§ø‡§ï‡•á‡§ü ‡§µ‡§ø‡§∂‡•ç‡§µ ‡§ï‡§™ ‡§Æ‡•á‡§Ç ‡§≠‡§æ‡§∞‡§§ ‡§ï‡•Ä ‡§ú‡•Ä‡§§"  # Change this!
category = "‡§ñ‡•á‡§≤"  # Change this!

inputs = tokenizer(
    [f"### Headline: {headline}\n ### Category: {category}  ### Article: "],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=256,
    use_cache=True,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
)
generated_text = tokenizer.batch_decode(outputs)[0]

print("="*60)
print(f"Headline: {headline}")
print(f"Category: {category}")
print("="*60)
print("Generated Article:")
print(generated_text.split("### Article:")[1] if "### Article:" in generated_text else generated_text)

## Step 10: Save the Model (Optional)

In [None]:
# Save LoRA adapters locally
model.save_pretrained("hindi_article_lora")
tokenizer.save_pretrained("hindi_article_lora")
print("‚úì Model saved to 'hindi_article_lora'")

# Download the saved model
from google.colab import files
!zip -r hindi_article_lora.zip hindi_article_lora
files.download('hindi_article_lora.zip')

## Conclusion

This notebook demonstrated:
- Loading Llama-3 8B with 4-bit quantization using Unsloth
- Fine-tuning with LoRA adapters on Hindi news data
- Generating Hindi articles from headlines

**Other possible tasks with this dataset:**
- Generating headlines from articles
- Article classification by category
- Headline classification by category