# Code2Prompt: LoRA Fine-Tuning for Code Generation

This notebook demonstrates how to fine-tune a language model to generate code from natural language descriptions using LoRA (Low-Rank Adaptation).

**Base Implementation**: Adapted from [Liquid4All/leap-finetune](https://github.com/Liquid4All/leap-finetune)

## Features
- Fine-tune Qwen2.5-1.5B with LoRA
- Code generation from natural language
- Optimized for Manim mathematical animations
- Memory-efficient training on Colab


In [None]:
# Install required dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate peft datasets bitsandbytes
!pip install -q wandb tensorboard
!pip install -q scipy scikit-learn matplotlib seaborn


In [None]:
import torch
import json
import pandas as pd
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments, 
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
from datasets import Dataset
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")


In [None]:
# Configuration
CONFIG = {
    "model_name": "Qwen/Qwen2.5-1.5B",
    "max_length": 1024,
    "learning_rate": 2e-4,
    "num_epochs": 3,
    "batch_size": 1,
    "gradient_accumulation_steps": 4,
    "warmup_ratio": 0.03,
    "weight_decay": 0.01,
    "save_steps": 500,
    "eval_steps": 500,
    "logging_steps": 10,
    "output_dir": "./outputs",
    "lora_rank": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.1
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")
