In [None]:
# Installation of the necessary libraries
!pip install -q -U torch transformers datasets peft bitsandbytes accelerate "numpy<2.0"
!pip install -q -U trl==0.9.6
# Login to Hugging Face (important for later uploads!)
from huggingface_hub import login


my_token = ""

login(token=my_token)

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
)
from peft import LoraConfig
from trl import SFTTrainer

# 1. configuration
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
NEW_MODEL_NAME = "Qwen2.5-1.5B-SQL-Assistant"
DATASET_ID = "b-mc2/sql-create-context"

# 2. Load & prepare data set
dataset = load_dataset(DATASET_ID, split="train").shuffle(seed=42).select(range(1000)) # Nur 1000 Beispiele für Demo
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

def format_prompt(sample):
    # Qwen Chat Template Formatting
    prompt = f"<|im_start|>system\nYou are a SQL expert.<|im_end|>\n<|im_start|>user\n{sample['context']}\nQuestion: {sample['question']}<|im_end|>\n<|im_start|>assistant\n{sample['answer']}<|im_end|>"
    return {"text": prompt}

dataset = dataset.map(format_prompt)

# 3. Load model in 4-bit (quantization for efficiency)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, quantization_config=bnb_config, device_map="auto"
)

# 4. LoRA configuration (Parameter Efficient Fine-Tuning)
peft_config = LoraConfig(
    r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
)

# 5. Start training
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    logging_steps=10,
    num_train_epochs=1,
    fp16=True,
    optim="paged_adamw_32bit"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_args
)

print("Training startet...")
trainer.train()

# 6. Adapter locally save
trainer.model.save_pretrained(NEW_MODEL_NAME)
tokenizer.save_pretrained(NEW_MODEL_NAME)
print("Training beendet und Modell gespeichert.")

In [6]:
from peft import PeftModel

# Reload base model (if memory is full, restart kernel beforehand)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    load_in_4bit=True,
    device_map="auto",
    dtype=torch.float16
)

# The finely tuned adapters charge
model_to_test = PeftModel.from_pretrained(base_model, NEW_MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# # Test query
context = "CREATE TABLE employees (employee_id INT PRIMARY KEY, name VARCHAR(255) NOT NULL, role VARCHAR(255), manager_id INT, FOREIGN KEY (manager_id) REFERENCES employees(employee_id))"
question = "Which employees report to the manager “Julia König”?"

# Prompt in Chat-Format
messages = [
    {"role": "system", "content": "You are a SQL expert."},
    {"role": "user", "content": f"{context}\nQuestion: {question}"}
]
text_input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text_input, return_tensors="pt").to("cuda")

# Generation
outputs = model_to_test.generate(**inputs, max_new_tokens=50)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("-" * 30)
print(f"Generated SQL:\n{result.split('assistant')[-1].strip()}")
print("-" * 30)