# https://github.com/unslothai/unsloth

conda create --name env_llm python=3.11 pytorch-cuda=12.1 pytorch cudatoolkit -c pytorch -c nvidia -y
conda activate env_llm
# pip install xformers | conda install -c conda-forge xformers |
pip install -U xformers --index-url https://download.pytorch.org/whl/cu124

pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
pip install --no-deps trl peft accelerate bitsandbytes


In [1]:
from unsloth import FastLanguageModel
import torch

cuda_available = torch.cuda.is_available()
print(f"CUDA disponible: {cuda_available}")

# Mostrar el dispositivo actual
if cuda_available:
    device = torch.device("cuda")
    print(f"Dispositivo actual: {torch.cuda.get_device_name(0)}")
    print(f"Cantidad de GPUs disponibles: {torch.cuda.device_count()}")
else:
    device = torch.device("cpu")
    print("Se está utilizando la CPU.")

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", 
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
CUDA disponible: True
Dispositivo actual: NVIDIA GeForce RTX 3060
Cantidad de GPUs disponibles: 1
==((====))==  Unsloth 2025.1.7: Fast Llama patching. Transformers: 4.48.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3060. Max memory: 12.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


  self.register_buffer("cos_cached", emb.cos().to(dtype=dtype, device=device, non_blocking=True), persistent=False)
Unsloth 2025.1.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [2]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Company database: {}

### Input:
SQL Prompt: {}

### Response:
SQL: {}

Explanation: {}
"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    company_databases = examples["sql_context"]
    prompts = examples["sql_prompt"]
    sqls = examples["sql"]
    explanations = examples["sql_explanation"]
    texts = []

    for company_database, prompt, sql, explanation in zip(company_databases, prompts, sqls, explanations):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(company_database, prompt, sql, explanation) + EOS_TOKEN
        texts.append(text)

    return {"text": texts}

from datasets import load_dataset
dataset = load_dataset("gretelai/synthetic_text_to_sql", split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

In [3]:
print(dataset['text'][0])

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Company database: CREATE TABLE salesperson (salesperson_id INT, name TEXT, region TEXT); INSERT INTO salesperson (salesperson_id, name, region) VALUES (1, 'John Doe', 'North'), (2, 'Jane Smith', 'South'); CREATE TABLE timber_sales (sales_id INT, salesperson_id INT, volume REAL, sale_date DATE); INSERT INTO timber_sales (sales_id, salesperson_id, volume, sale_date) VALUES (1, 1, 120, '2021-01-01'), (2, 1, 150, '2021-02-01'), (3, 2, 180, '2021-01-01');

### Input:
SQL Prompt: What is the total volume of timber sold by each salesperson, sorted by salesperson?

### Response:
SQL: SELECT salesperson_id, name, SUM(volume) as total_volume FROM timber_sales JOIN salesperson ON timber_sales.salesperson_id = salesperson.salesperson_id GROUP BY salesperson_id, name ORDER BY total_volume DESC;

Explanation: Joins timber_sale

In [4]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from trl import SFTTrainer

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    )
)

In [5]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.5963
2,1.5913
3,1.5736
4,1.5161
5,1.3274
6,1.1763
7,1.0746
8,0.8376
9,0.798
10,0.8163


In [8]:
model.save_pretrained("./models/lora_model") # Local saving
tokenizer.save_pretrained("./models/lora_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('./models/lora_model\\tokenizer_config.json',
 './models/lora_model\\special_tokens_map.json',
 './models/lora_model\\tokenizer.json')

In [13]:
def generate_sql(model, tokenizer, company_db, sql_prompt):
    # Crear el input en el formato de entrenamiento
    input_text = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Company database: {company_db}

### Input:
SQL Prompt: {sql_prompt}

### Response:
SQL: """

    FastLanguageModel.for_inference(model)
    
    # Tokenizar el input
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    # Generar la respuesta
    output = model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.95)

    # Decodificar la respuesta generada
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

company_db = "Tables: users(id, name, age), orders(id, user_id, amount)"
sql_prompt = "Get the total amount spent by each user."
generated_response = generate_sql(model, tokenizer, company_db, sql_prompt)
print(generated_response)

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Company database: Tables: users(id, name, age), orders(id, user_id, amount)

### Input:
SQL Prompt: Get the total amount spent by each user.

### Response:
SQL:  SELECT users.name, SUM(orders.amount) as total_amount FROM users INNER JOIN orders ON users.id = orders.user_id GROUP BY users.name;

Explanation: This query gets the total amount spent by each user by joining the users table with the orders table on the user_id. It then groups the results by user name and sums the amount spent by each user.



# END