In [None]:
%%capture
# Install required libraries
!pip install -q -U bitsandbytes transformers peft accelerate datasets \
                einops sentencepiece wandb trl huggingface_hub

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer
import wandb
import os

# Login to Hugging Face Hub (get your token: https://huggingface.co/settings/tokens)
from huggingface_hub import notebook_login
notebook_login()

# Initialize W&B (optional - get your key: https://wandb.ai/authorize)
wandb.login()

In [None]:
# Configuration
MODEL_NAME = "codellama/CodeLlama-7b-hf"
DATASET_NAME = "code_x_glue_cc_code_to_text"
OUTPUT_DIR = "codellama-7b-code-summarization"
LORA_R = 64
LORA_ALPHA = 16
LORA_DROPOUT = 0.1
PER_DEVICE_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
OPTIMIZER = "paged_adamw_32bit"
LR = 2e-4
MAX_SEQ_LENGTH = 512
NUM_TRAIN_EPOCHS = 3
FP16 = True

# Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

In [None]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# Prepare model for QLoRA training
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
# Load and preprocess dataset
dataset = load_dataset(DATASET_NAME, "python")

def format_prompt(example):
    return {
        "text": f"### Code:\n{example['code']}\n\n### Summary:\n{example['docstring']}"
    }

dataset = dataset.map(
    format_prompt,
    remove_columns=['code', 'docstring', 'id']
)

train_dataset = dataset["train"]
eval_dataset = dataset["validation"]

In [None]:
# Training configuration
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LR,
    logging_steps=10,
    max_steps=500,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    fp16=FP16,
    optim=OPTIMIZER,
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=100,
    report_to="wandb",
    run_name="codellama-7b-qlora-code-summarization",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    weight_decay=0.01,
    max_grad_norm=0.3,
    push_to_hub=True
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    tokenizer=tokenizer,
    packing=True
)

# Start training
trainer.train()

# Save final model
trainer.push_to_hub()
trainer.model.save_pretrained(OUTPUT_DIR)

In [None]:
# Inference test
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

test_code = '''
def fibonacci(n):
    """Calculate Fibonacci sequence"""
    a, b = 0, 1
    for _ in range(n):
        yield a
        a, b = b, a + b
'''

prompt = f"### Code:\n{test_code}\n\n### Summary:\n"
result = pipe(
    prompt,
    max_new_tokens=100,
    temperature=0.2,
    do_sample=True,
    top_k=50,
    top_p=0.95
)

print(result[0]['generated_text'])