In [5]:
import torch
print("GPU disponible:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU usada:", torch.cuda.get_device_name(0))
    print("Versión CUDA de PyTorch:", torch.version.cuda)

GPU disponible: True
GPU usada: NVIDIA GeForce RTX 4070 Laptop GPU
Versión CUDA de PyTorch: 12.8


In [7]:
# ================================================
# 📦 Carga y formateo del dataset tipo "shell emulator"
# ================================================
import json, re
from datasets import Dataset

# Ruta a tu dataset (ajústala si es necesario)
dataset_path = "shell_dataset.jsonl"

# Cargar cada línea del JSONL
with open(dataset_path, "r", encoding="utf-8") as f:
    lines = [json.loads(line) for line in f]

# ================================================
# 🔍 Función para parsear las conversaciones
# Convierte texto plano con etiquetas System/Human/AI → formato messages estándar
# ================================================
def parse_conversation(text: str):
    messages = []
    current_role = None
    buffer = []

    for line in text.strip().splitlines():
        match = re.match(r"^(System|Human|AI):\s*(.*)", line)
        if match:
            # Si cambiamos de rol, guardamos el bloque anterior
            if current_role and buffer:
                messages.append({"role": current_role, "content": "\n".join(buffer).strip()})
                buffer = []
            role = match.group(1)
            # Mapeamos a roles estándar
            current_role = (
                "system" if role == "System"
                else "user" if role == "Human"
                else "assistant"
            )
            buffer.append(match.group(2))
        else:
            # Línea sin etiqueta: pertenece al mensaje actual
            buffer.append(line)
    
    # Guardar el último bloque
    if current_role and buffer:
        messages.append({"role": current_role, "content": "\n".join(buffer).strip()})
    
    return messages


# ================================================
# 🧠 Convertir todas las conversaciones
# ================================================
parsed_data = []
for item in lines:
    try:
        messages = parse_conversation(item["text"])
        if messages:  # solo añadimos si hay contenido
            parsed_data.append({"messages": messages})
    except Exception as e:
        print(f"❌ Error parsing line: {e}")
        continue

# Crear dataset HuggingFace
dataset = Dataset.from_list(parsed_data)

print(dataset[1])  # ejemplo para verificar






{'messages': [{'content': 'You are a Linux terminal emulator.\nThe current user is "Ubuntu_user" and the hostname is "Ubuntu". \nFollow these rules:\n1. Output only what a real Linux shell would print to stdout/stderr for the given input.\n2. Respond to all valid Linux commands exactly as a real terminal would.\n3. For invalid or non-Linux commands, return the typical shell error.\n4. Assume a standard Linux environment with typical file structures, system utilities, and commands.\n5. For commands that require interaction (vim, top, nano, passwd), return the short non-interactive error message a real shell would show.\n6. Every output must end with a line containing only the current working directory path.', 'role': 'system'}, {'content': 'ls -la', 'role': 'user'}, {'content': 'total 54\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Oct 29 12:23 .\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Sep 15 14:23 ..\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Oct 03 08:47 Documents\ndrwxr-xr-x 2 ubunt

In [8]:
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
import torch
from trl import SFTTrainer
from transformers import TrainingArguments

# ================================================
# 🚀 Cargar modelo base
# ================================================
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "microsoft/phi-3-mini-4k-instruct",
    device_map = "auto",
    dtype = None,
    load_in_4bit = True,
    load_in_8bit = False,
    gpu_memory_utilization = 0.85,
    trust_remote_code = True,
)

# ================================================
# 🧩 Añadir LoRA adapters
# ================================================
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-3"
)
# ================================================
# 🧠 Función de formateo para el entrenamiento tipo chat
# ================================================
def formatting_prompts_func(examples):
    texts = []

    # Si "examples" es una sola fila del dataset:
    if isinstance(examples["messages"][0], dict):
        conversations = [examples["messages"]]  # una lista que contiene una sola conversación
    else:
        conversations = examples["messages"]

    for conversation in conversations:
        formatted = tokenizer.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=False
        )
        texts.append(formatted)

    return texts
print(formatting_prompts_func(dataset[1]))

    



# ================================================
# ⚙️ Argumentos de entrenamiento optimizados para Unsloth
# ================================================
max_seq_length = 2048

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,  # Effective batch size = 8
    warmup_steps=10,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=25,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    save_strategy="epoch",
    save_total_limit=2,
    dataloader_pin_memory=False,
    report_to="none",  # Disable Weights & Biases logging
)



Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.10.3: Fast Mistral patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA GeForce RTX 4070 Laptop GPU. Num GPUs = 1. Max memory: 7.996 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33+f204359.d20251014. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


INFO:accelerate.utils.modeling: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


['<|system|>\nYou are a Linux terminal emulator.\nThe current user is "Ubuntu_user" and the hostname is "Ubuntu". \nFollow these rules:\n1. Output only what a real Linux shell would print to stdout/stderr for the given input.\n2. Respond to all valid Linux commands exactly as a real terminal would.\n3. For invalid or non-Linux commands, return the typical shell error.\n4. Assume a standard Linux environment with typical file structures, system utilities, and commands.\n5. For commands that require interaction (vim, top, nano, passwd), return the short non-interactive error message a real shell would show.\n6. Every output must end with a line containing only the current working directory path.<|end|>\n<|user|>\nls -la<|end|>\n<|assistant|>\ntotal 54\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Oct 29 12:23 .\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Sep 15 14:23 ..\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Oct 03 08:47 Documents\ndrwxr-xr-x 2 ubuntu_user ubuntu_user 4096 Sep 08 21:54 D

In [9]:
# ================================================
# 🏋️ Entrenador SFT (fine-tuning estilo chat)
# ================================================
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field = "text",
    formatting_func=formatting_prompts_func,
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=training_args,
)

trainer_stats = trainer.train()

Unsloth: Tokenizing ["text"] (num_proc=20):   0%|          | 0/1000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 3 | Total steps = 375
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 29,884,416 of 3,850,963,968 (0.78% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
25,0.8882
50,0.1823
75,0.1423
100,0.1051
125,0.1141
150,0.1289
175,0.0996
200,0.1302
225,0.121
250,0.1188


In [19]:
from unsloth import FastLanguageModel

# 🔧 Habilitar modo de inferencia rápida
FastLanguageModel.for_inference(model)

messages = [
    {"role": "system", "content": 'You are a Linux terminal emulator. """The current user is "unmatru" and the hostname is "Ubuntu""" ...'},
    {"role": "user", "content": "cat install.sh"},
]

prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt_text, return_tensors="pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=100, do_sample=False)

response = tokenizer.decode(outputs[0], skip_special_tokens=False)

print(response)



<|system|> You are a Linux terminal emulator. """The current user is "unmatru" and the hostname is "Ubuntu""" ...<|end|><|user|> cat install.sh<|end|><|assistant|> #!/bin/bash
./configure
make
make install

unmatru@Ubuntu:~$<|end|>


In [20]:
model.save_pretrained_gguf("gguf_model", tokenizer, quantization_method="q4_k_m")

Unsloth: Merging model weights to 16-bit format...
Found HuggingFace hub cache directory: /workspace/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00001-of-00002.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files: 100%|██████████| 2/2 [00:00<00:00, 371.01it/s]
Unsloth: Merging weights into 16bit: 100%|██████████| 2/2 [05:13<00:00, 156.81s/it]


Unsloth: Merge process complete. Saved to `/workspace/work/gguf_model`
Unsloth: Converting to GGUF format...
==((====))==  Unsloth: Conversion from HF to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF bf16 might take 3 minutes.
\        /    [2] Converting GGUF bf16 to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: llama.cpp found in the system. Skipping installation.
Unsloth: Preparing converter script...


INFO:unsloth_zoo.llama_cpp: Unsloth: Identifying llama.cpp gguf supported architectures...
INFO:unsloth_zoo.llama_cpp: Unsloth: Applying patches...
INFO:unsloth_zoo.llama_cpp: Unsloth: Saving patched script to llama.cpp/unsloth_convert_hf_to_gguf.py
INFO:unsloth_zoo.llama_cpp: Unsloth: Parsing arguments from patched script...
INFO:unsloth_zoo.llama_cpp: Unsloth: Successfully processed convert_hf_to_gguf.py.


Unsloth: [1] Converting model into bf16 GGUF format.
This might take 3 minutes...
Unsloth: Initial conversion completed! Files: ['phi-3-mini-4k-instruct.BF16.gguf']
Unsloth: [2] Converting GGUF bf16 into q4_k_m. This might take 10 minutes...
Unsloth: Model files cleanup...
Unsloth: All GGUF conversions completed successfully!
Generated files: ['phi-3-mini-4k-instruct.Q4_K_M.gguf']
Unsloth: example usage for text only LLMs: llama-cli --model phi-3-mini-4k-instruct.Q4_K_M.gguf -p "why is the sky blue?"
Unsloth: Saved Ollama Modelfile to current directory
Unsloth: convert model to ollama format by running - ollama create model_name -f ./Modelfile - inside current directory.


{'save_directory': 'gguf_model',
 'gguf_files': ['phi-3-mini-4k-instruct.Q4_K_M.gguf'],
 'modelfile_location': '/workspace/work/Modelfile',
 'want_full_precision': False,
 'is_vlm': False,
 'fix_bos_token': False}

In [1]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

# Carga el modelo base original
base_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-3-mini-4k-instruct", dtype="auto")

# Carga tu adaptador LoRA
model = PeftModel.from_pretrained(base_model, "phi3_shell")

# Fusiona LoRA -> base
model = model.merge_and_unload()   # 👈 esta línea es la clave

# Guarda el modelo completo fusionado
model.save_pretrained("phi3_shell_full")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")
tokenizer.save_pretrained("phi3_shell_full")

'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /microsoft/phi-3-mini-4k-instruct/resolve/main/config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x750978954cd0>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: 615df376-c958-4933-b01d-7c9861073498)')' thrown while requesting HEAD https://huggingface.co/microsoft/phi-3-mini-4k-instruct/resolve/main/config.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /microsoft/phi-3-mini-4k-instruct/resolve/main/config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x750978955fd0>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: 62c73869-a6c3-4b63-9a73-f1e08e712ee5)')' thrown while requesting HEAD https://huggingface.c

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /microsoft/phi-3-mini-4k-instruct/resolve/main/generation_config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x750978890a50>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: c9bec49a-e585-4391-9732-e710c9e3ca26)')' thrown while requesting HEAD https://huggingface.co/microsoft/phi-3-mini-4k-instruct/resolve/main/generation_config.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError('HTTPSConnectionPool(host=\'huggingface.co\', port=443): Max retries exceeded with url: /microsoft/phi-3-mini-4k-instruct/resolve/main/generation_config.json (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7509788a6a50>: Failed to resolve \'huggingface.co\' ([Errno -3] Temporary failure in name resolution)"))'), '(Request ID: 3ed53410-5088-4c1e-a025-a5b97a5582eb)')' thrown while requ

('phi3_shell_full/tokenizer_config.json',
 'phi3_shell_full/special_tokens_map.json',
 'phi3_shell_full/chat_template.jinja',
 'phi3_shell_full/tokenizer.model',
 'phi3_shell_full/added_tokens.json',
 'phi3_shell_full/tokenizer.json')