# Train, Evaluate, and Publish Terminal Command Model

This notebook:
1. Trains Qwen3-0.6B with QLoRA on terminal command dataset
2. Evaluates the model while still in memory
3. Saves LoRA adapters locally and publishes to HuggingFace
4. Saves merged model locally and publishes to HuggingFace

**HuggingFace Repos:**
- Adapters: `{username}/qwen3-0.6b-terminal-instruct-lora`
- Merged: `{username}/qwen3-0.6b-terminal-instruct`

## Cell 1: Environment Setup and CUDA Verification

In [1]:
import os
import torch
import warnings
warnings.filterwarnings('ignore')

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_LAUNCH_BLOCKING"] = "0"

print("=" * 50)
print("CUDA VERIFICATION")
print("=" * 50)
print("PyTorch Version:", torch.__version__)

if torch.cuda.is_available():
    print(f"‚úÖ CUDA is available!")
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)
    torch.cuda.empty_cache()
else:
    print("‚ùå CUDA is NOT available!")
    device = torch.device("cpu")

print("=" * 50)

CUDA VERIFICATION
PyTorch Version: 2.9.0+cu130
‚úÖ CUDA is available!
   GPU: NVIDIA GeForce RTX 2060
   VRAM: 6.00 GB


## Cell 2: Import Libraries

In [2]:
import json
import gc
from datetime import datetime
from pathlib import Path

from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForSeq2Seq
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    PeftModel
)
from huggingface_hub import HfApi, login
from tqdm import tqdm

print("‚úÖ All libraries imported successfully")

‚úÖ All libraries imported successfully


## Cell 3: Configuration

In [3]:
# ============================================
# HUGGINGFACE CONFIGURATION
# ============================================
HF_USERNAME = "Eng-Elias"  # <-- Change this to your HuggingFace username
HF_TOKEN = None  # Will prompt if None, or set your token here

# Repository names
HF_REPO_ADAPTERS = f"{HF_USERNAME}/qwen3-0.6b-terminal-instruct-lora"
HF_REPO_MERGED = f"{HF_USERNAME}/qwen3-0.6b-terminal-instruct"

# ============================================
# MODEL & TRAINING CONFIGURATION
# ============================================
CONFIG = {
    # Model
    "base_model": "Qwen/Qwen3-0.6B",
    
    # Paths
    "train_data": "../dataset/generated/processed/train.json",
    "dev_data": "../dataset/generated/processed/dev.json",
    "test_data": "../dataset/generated/processed/test.json",
    "output_dir": "../outputs/terminal_command_model",
    "adapter_save_path": "../outputs/lora_adapters",
    "merged_model_path": "../outputs/merged_model",
    
    # Quantization (for 6GB VRAM)
    "load_in_4bit": True,
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": True,
    "bnb_4bit_compute_dtype": "float16",
    
    # LoRA Configuration
    "lora_r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.1,
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
    
    # Training Configuration - RTX 2060 Optimized
    "num_epochs": 3,
    "per_device_train_batch_size": 2,
    "per_device_eval_batch_size": 2,
    "gradient_accumulation_steps": 8,
    "learning_rate": 2e-4,
    "lr_scheduler_type": "cosine",
    "warmup_ratio": 0.1,
    "max_seq_length": 256,
    "logging_steps": 25,
    "eval_steps": 100,
    "save_steps": 200,
    "save_total_limit": 2,
    
    # Memory Optimizations
    "gradient_checkpointing": True,
    "fp16": True,
    "bf16": False,
    "optim": "paged_adamw_8bit",
    
    # Evaluation
    "max_new_tokens": 150,
}

print("=" * 50)
print("CONFIGURATION")
print("=" * 50)
print(f"HuggingFace Username: {HF_USERNAME}")
print(f"Adapters Repo: {HF_REPO_ADAPTERS}")
print(f"Merged Model Repo: {HF_REPO_MERGED}")
print("=" * 50)

CONFIGURATION
HuggingFace Username: Eng-Elias
Adapters Repo: Eng-Elias/qwen3-0.6b-terminal-instruct-lora
Merged Model Repo: Eng-Elias/qwen3-0.6b-terminal-instruct


## Cell 4: HuggingFace Login

In [4]:
# Login to HuggingFace
if HF_TOKEN is None:
    print("Please enter your HuggingFace token (get it from https://huggingface.co/settings/tokens):")
    HF_TOKEN = input("Token: ")

login(token=HF_TOKEN)
print("‚úÖ Logged in to HuggingFace")

Please enter your HuggingFace token (get it from https://huggingface.co/settings/tokens):


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


‚úÖ Logged in to HuggingFace


## Cell 5: Load Dataset

In [5]:
def load_json_dataset(filepath):
    """Load dataset from JSON file."""
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return Dataset.from_list(data)

print("Loading datasets...")
train_dataset = load_json_dataset(CONFIG["train_data"])
eval_dataset = load_json_dataset(CONFIG["dev_data"])
test_dataset = load_json_dataset(CONFIG["test_data"])

print(f"‚úÖ Train samples: {len(train_dataset)}")
print(f"‚úÖ Eval samples: {len(eval_dataset)}")
print(f"‚úÖ Test samples: {len(test_dataset)}")

print("\nüìù Sample data:")
print(json.dumps(train_dataset[0], indent=2))

Loading datasets...
‚úÖ Train samples: 9780
‚úÖ Eval samples: 1150
‚úÖ Test samples: 577

üìù Sample data:
{
  "instruction": "Find all .css files in temp",
  "input": "[LINUX]",
  "output": "find temp -name '*.css'"
}


## Cell 6: Load Model and Tokenizer

In [6]:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    CONFIG["base_model"],
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"‚úÖ Tokenizer loaded")

# Quantization config
print("\nConfiguring 4-bit quantization...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=CONFIG["load_in_4bit"],
    bnb_4bit_quant_type=CONFIG["bnb_4bit_quant_type"],
    bnb_4bit_use_double_quant=CONFIG["bnb_4bit_use_double_quant"],
    bnb_4bit_compute_dtype=torch.float16
)

print("Loading model with quantization...")
model = AutoModelForCausalLM.from_pretrained(
    CONFIG["base_model"],
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16
)

print(f"‚úÖ Model loaded: {CONFIG['base_model']}")
if torch.cuda.is_available():
    print(f"   VRAM used: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")

Loading tokenizer...
‚úÖ Tokenizer loaded

Configuring 4-bit quantization...
Loading model with quantization...


`torch_dtype` is deprecated! Use `dtype` instead!


‚úÖ Model loaded: Qwen/Qwen3-0.6B
   VRAM used: 0.50 GB


## Cell 7: Setup LoRA

In [7]:
print("Preparing model for LoRA training...")
model = prepare_model_for_kbit_training(
    model,
    use_gradient_checkpointing=CONFIG["gradient_checkpointing"]
)

lora_config = LoraConfig(
    r=CONFIG["lora_r"],
    lora_alpha=CONFIG["lora_alpha"],
    target_modules=CONFIG["target_modules"],
    lora_dropout=CONFIG["lora_dropout"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

print("\n" + "=" * 50)
print("TRAINABLE PARAMETERS")
print("=" * 50)
model.print_trainable_parameters()

if torch.cuda.is_available():
    print(f"\nVRAM used after LoRA setup: {torch.cuda.memory_allocated(0) / 1024**3:.2f} GB")

Preparing model for LoRA training...

TRAINABLE PARAMETERS
trainable params: 4,587,520 || all params: 600,637,440 || trainable%: 0.7638

VRAM used after LoRA setup: 0.81 GB


## Cell 8: Data Preprocessing

In [8]:
def format_instruction(sample):
    """Format sample into instruction prompt with EOS token."""
    instruction = sample["instruction"]
    input_text = sample.get("input", "")
    output = sample["output"]
    
    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n{output}{tokenizer.eos_token}"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{output}{tokenizer.eos_token}"
    
    return prompt

def preprocess_function(examples):
    """Tokenize and prepare data for training."""
    prompts = [format_instruction({"instruction": inst, "input": inp, "output": out}) 
               for inst, inp, out in zip(examples["instruction"], 
                                          examples["input"], 
                                          examples["output"])]
    
    tokenized = tokenizer(
        prompts,
        truncation=True,
        max_length=CONFIG["max_seq_length"],
        padding="max_length",
        return_tensors=None
    )
    
    tokenized["labels"] = tokenized["input_ids"].copy()
    
    return tokenized

print("üìù Sample formatted prompt:")
print("-" * 50)
print(format_instruction(train_dataset[0]))
print("-" * 50)

print("\nTokenizing datasets...")
tokenized_train = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=train_dataset.column_names,
    desc="Tokenizing train"
)

tokenized_eval = eval_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=eval_dataset.column_names,
    desc="Tokenizing eval"
)

print(f"‚úÖ Tokenized train: {len(tokenized_train)} samples")
print(f"‚úÖ Tokenized eval: {len(tokenized_eval)} samples")

üìù Sample formatted prompt:
--------------------------------------------------
### Instruction:
Find all .css files in temp

### Input:
[LINUX]

### Response:
find temp -name '*.css'<|im_end|>
--------------------------------------------------

Tokenizing datasets...


Tokenizing train:   0%|          | 0/9780 [00:00<?, ? examples/s]

Tokenizing eval:   0%|          | 0/1150 [00:00<?, ? examples/s]

‚úÖ Tokenized train: 9780 samples
‚úÖ Tokenized eval: 1150 samples


## Cell 9: Training Arguments and Trainer

In [9]:
Path(CONFIG["output_dir"]).mkdir(parents=True, exist_ok=True)
Path(CONFIG["adapter_save_path"]).mkdir(parents=True, exist_ok=True)
Path(CONFIG["merged_model_path"]).mkdir(parents=True, exist_ok=True)

training_args = TrainingArguments(
    output_dir=CONFIG["output_dir"],
    num_train_epochs=CONFIG["num_epochs"],
    per_device_train_batch_size=CONFIG["per_device_train_batch_size"],
    per_device_eval_batch_size=CONFIG["per_device_eval_batch_size"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
    learning_rate=CONFIG["learning_rate"],
    lr_scheduler_type=CONFIG["lr_scheduler_type"],
    warmup_ratio=CONFIG["warmup_ratio"],
    gradient_checkpointing=CONFIG["gradient_checkpointing"],
    fp16=CONFIG["fp16"],
    bf16=CONFIG["bf16"],
    optim=CONFIG["optim"],
    eval_strategy="steps",
    eval_steps=CONFIG["eval_steps"],
    save_strategy="steps",
    save_steps=CONFIG["save_steps"],
    save_total_limit=CONFIG["save_total_limit"],
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    logging_dir=f"{CONFIG['output_dir']}/logs",
    logging_steps=CONFIG["logging_steps"],
    report_to="none",
    remove_unused_columns=False,
    dataloader_pin_memory=True,
    dataloader_num_workers=0,
)

data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    padding=True,
    return_tensors="pt"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    data_collator=data_collator,
)

print("‚úÖ Trainer initialized")
print(f"   Effective batch size: {CONFIG['per_device_train_batch_size'] * CONFIG['gradient_accumulation_steps']}")

‚úÖ Trainer initialized
   Effective batch size: 16


## Cell 10: Train Model

In [10]:
print("=" * 50)
print("üöÄ STARTING TRAINING")
print("=" * 50)
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()

try:
    train_result = trainer.train()
    
    print("\n" + "=" * 50)
    print("‚úÖ TRAINING COMPLETED!")
    print("=" * 50)
    print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Training time: {train_result.metrics['train_runtime'] / 60:.2f} minutes")
    print(f"Training loss: {train_result.metrics['train_loss']:.4f}")
    
except Exception as e:
    print(f"‚ùå Training failed: {e}")
    raise

üöÄ STARTING TRAINING
Start time: 2025-12-29 22:37:31


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
100,0.3108,0.237161
200,0.1446,0.131519
300,0.1131,0.107887
400,0.0912,0.092697
500,0.0884,0.080882
600,0.0759,0.07085
700,0.0622,0.064213
800,0.0583,0.059128
900,0.0534,0.054755
1000,0.0528,0.050887



‚úÖ TRAINING COMPLETED!
End time: 2025-12-30 02:26:42
Training time: 229.17 minutes
Training loss: 0.3007


## Cell 11: Evaluate Model In-Memory

In [11]:
def generate_command(instruction, input_text="", max_new_tokens=150):
    """Generate command from instruction."""
    if input_text:
        prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
    else:
        prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=200).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "### Response:" in response:
        response = response.split("### Response:")[-1].strip()
    response = response.split("### ")[0].strip()
    
    return response

def exact_match(pred, gold):
    return pred.strip() == gold.strip()

def fuzzy_match(pred, gold):
    pred_norm = ' '.join(pred.lower().split())
    gold_norm = ' '.join(gold.lower().split())
    return pred_norm == gold_norm or gold_norm in pred_norm or pred_norm in gold_norm

# Load test data
with open(CONFIG["test_data"], 'r', encoding='utf-8') as f:
    test_data = json.load(f)

single_os_tests = [t for t in test_data if t["input"] in ["[LINUX]", "[WINDOWS]", "[MAC]", ""]]
json_tests = [t for t in test_data if "JSON" in t["input"].upper()]

print("=" * 50)
print("üîç EVALUATING MODEL IN-MEMORY")
print("=" * 50)

# Evaluate single OS commands
print("\nüìä Evaluating Single OS Commands...")
single_results = {"total": 0, "exact_match": 0, "fuzzy_match": 0}

sample_size = min(100, len(single_os_tests))  # Sample for speed
for sample in tqdm(single_os_tests[:sample_size], desc="Evaluating"):
    pred = generate_command(sample["instruction"], sample["input"])
    gold = sample["output"]
    
    single_results["total"] += 1
    if exact_match(pred, gold):
        single_results["exact_match"] += 1
    if fuzzy_match(pred, gold):
        single_results["fuzzy_match"] += 1

print("\n" + "=" * 50)
print("üìä IN-MEMORY EVALUATION RESULTS")
print("=" * 50)
print(f"Total evaluated: {single_results['total']}")
print(f"Exact match: {single_results['exact_match']} ({100*single_results['exact_match']/single_results['total']:.1f}%)")
print(f"Fuzzy match: {single_results['fuzzy_match']} ({100*single_results['fuzzy_match']/single_results['total']:.1f}%)")
print("=" * 50)

üîç EVALUATING MODEL IN-MEMORY

üìä Evaluating Single OS Commands...


Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [02:27<00:00,  1.47s/it]


üìä IN-MEMORY EVALUATION RESULTS
Total evaluated: 100
Exact match: 93 (93.0%)
Fuzzy match: 94 (94.0%)





## Cell 12: Save LoRA Adapters Locally

In [12]:
print("=" * 50)
print("üíæ SAVING LORA ADAPTERS LOCALLY")
print("=" * 50)

adapter_path = CONFIG["adapter_save_path"]

# Save LoRA adapters
model.save_pretrained(adapter_path)
print(f"‚úÖ LoRA adapters saved to: {adapter_path}")

# Save tokenizer
tokenizer.save_pretrained(adapter_path)
print(f"‚úÖ Tokenizer saved to: {adapter_path}")

# Save training config
config_save_path = f"{adapter_path}/training_config.json"
with open(config_save_path, 'w') as f:
    json.dump(CONFIG, f, indent=2)
print(f"‚úÖ Config saved to: {config_save_path}")

print("\nüìÅ Saved adapter files:")
for f in Path(adapter_path).iterdir():
    size_mb = f.stat().st_size / 1024 / 1024
    print(f"   {f.name}: {size_mb:.2f} MB")

üíæ SAVING LORA ADAPTERS LOCALLY
‚úÖ LoRA adapters saved to: ../outputs/lora_adapters
‚úÖ Tokenizer saved to: ../outputs/lora_adapters
‚úÖ Config saved to: ../outputs/lora_adapters/training_config.json

üìÅ Saved adapter files:
   adapter_config.json: 0.00 MB
   adapter_model.safetensors: 17.53 MB
   added_tokens.json: 0.00 MB
   chat_template.jinja: 0.00 MB
   merges.txt: 1.59 MB
   README.md: 0.00 MB
   special_tokens_map.json: 0.00 MB
   tokenizer.json: 10.89 MB
   tokenizer_config.json: 0.01 MB
   training_config.json: 0.00 MB
   vocab.json: 2.65 MB


## Cell 13: Publish LoRA Adapters to HuggingFace

In [13]:
print("=" * 50)
print("üöÄ PUBLISHING LORA ADAPTERS TO HUGGINGFACE")
print("=" * 50)
print(f"Repository: {HF_REPO_ADAPTERS}")

try:
    # Push adapters to HuggingFace
    model.push_to_hub(
        HF_REPO_ADAPTERS,
        commit_message="Upload LoRA adapters for terminal command generation",
        private=False  # Set to True if you want private repo
    )
    print("‚úÖ LoRA adapters pushed!")
    
    # Push tokenizer
    tokenizer.push_to_hub(
        HF_REPO_ADAPTERS,
        commit_message="Upload tokenizer"
    )
    print("‚úÖ Tokenizer pushed!")
    
    print(f"\nüîó View at: https://huggingface.co/{HF_REPO_ADAPTERS}")
    
except Exception as e:
    print(f"‚ùå Failed to push adapters: {e}")

üöÄ PUBLISHING LORA ADAPTERS TO HUGGINGFACE
Repository: Eng-Elias/qwen3-0.6b-terminal-instruct-lora


README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

‚úÖ LoRA adapters pushed!


README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

No files have been modified since last commit. Skipping to prevent empty commit.


‚úÖ Tokenizer pushed!

üîó View at: https://huggingface.co/Eng-Elias/qwen3-0.6b-terminal-instruct-lora


## Cell 14: Create and Save Merged Model

In [14]:
print("=" * 50)
print("üîÄ PREPARING MERGED MODEL")
print("=" * 50)

# ============================================
# IMPORTANT: Merge directly from trained model in memory
# ============================================
# The trained PeftModel (model) is still in memory from training.
# We merge directly from it - do NOT reload the base model separately.
# This preserves the exact training state and maintains accuracy.
# ============================================

print("Step 1: Verifying trained model is still in memory...")
print(f"   Model type: {type(model).__name__}")

# Verify accuracy before merge (quick check)
print("\nStep 2: Quick accuracy verification before merge...")
print("-" * 50)

verify_results = {"total": 0, "exact_match": 0}
verification_size = min(50, len(single_os_tests))

for sample in tqdm(single_os_tests[:verification_size], desc="Verifying"):
    pred = generate_command(sample["instruction"], sample["input"])
    gold = sample["output"]
    verify_results["total"] += 1
    if pred.strip() == gold.strip():
        verify_results["exact_match"] += 1

pre_merge_accuracy = 100 * verify_results["exact_match"] / verify_results["total"]
print(f"\n‚úÖ Pre-merge accuracy (trained PeftModel): {pre_merge_accuracy:.1f}%")

if pre_merge_accuracy < 90:
    print("   ‚ö†Ô∏è Warning: Accuracy seems low, but proceeding with merge...")

# Step 3: Merge adapters into base model
print("\nStep 3: Merging LoRA adapters into base model...")
print("   Calling model.merge_and_unload() on trained model...")

merged_model = model.merge_and_unload()
print("‚úÖ Adapters merged successfully!")

# Step 4: Save merged model locally
print("\nStep 4: Saving merged model locally...")
merged_model_path = CONFIG["merged_model_path"]
merged_model.save_pretrained(merged_model_path)
tokenizer.save_pretrained(merged_model_path)
print(f"‚úÖ Merged model saved to: {merged_model_path}")

print("\nüìÅ Merged model files:")
for f in Path(merged_model_path).iterdir():
    size_mb = f.stat().st_size / 1024 / 1024
    print(f"   {f.name}: {size_mb:.2f} MB")

üîÄ PREPARING MERGED MODEL
Step 1: Verifying trained model is still in memory...
   Model type: PeftModelForCausalLM

Step 2: Quick accuracy verification before merge...
--------------------------------------------------


Verifying: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [01:13<00:00,  1.47s/it]



‚úÖ Pre-merge accuracy (trained PeftModel): 90.0%

Step 3: Merging LoRA adapters into base model...
   Calling model.merge_and_unload() on trained model...
‚úÖ Adapters merged successfully!

Step 4: Saving merged model locally...
‚úÖ Merged model saved to: ../outputs/merged_model

üìÅ Merged model files:
   added_tokens.json: 0.00 MB
   chat_template.jinja: 0.00 MB
   config.json: 0.00 MB
   generation_config.json: 0.00 MB
   merges.txt: 1.59 MB
   model.safetensors: 810.80 MB
   special_tokens_map.json: 0.00 MB
   tokenizer.json: 10.89 MB
   tokenizer_config.json: 0.01 MB
   vocab.json: 2.65 MB


## Cell 15: Publish Merged Model to HuggingFace

In [15]:
print("=" * 50)
print("üöÄ PUBLISHING MERGED MODEL TO HUGGINGFACE")
print("=" * 50)
print(f"Repository: {HF_REPO_MERGED}")
print("(LoRA adapters already pushed in Cell 13)")

try:
    # Push merged model
    print(f"\nüì§ Pushing merged model...")
    merged_model.push_to_hub(
        HF_REPO_MERGED,
        commit_message="Upload merged terminal command model",
        private=False
    )
    print(f"‚úÖ Merged model pushed!")
    
    # Push tokenizer
    print("\nüì§ Pushing tokenizer...")
    tokenizer.push_to_hub(HF_REPO_MERGED)
    print("‚úÖ Tokenizer pushed!")
    
    print(f"\nüîó View at: https://huggingface.co/{HF_REPO_MERGED}")
    
except Exception as e:
    print(f"\n‚ùå Error pushing to HuggingFace: {e}")
    import traceback
    traceback.print_exc()
    print("\nMake sure you're logged in with: huggingface-cli login")

üöÄ PUBLISHING MERGED MODEL TO HUGGINGFACE
Repository: Eng-Elias/qwen3-0.6b-terminal-instruct
(LoRA adapters already pushed in Cell 13)

üì§ Pushing merged model...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

‚úÖ Merged model pushed!

üì§ Pushing tokenizer...


README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

No files have been modified since last commit. Skipping to prevent empty commit.


‚úÖ Tokenizer pushed!

üîó View at: https://huggingface.co/Eng-Elias/qwen3-0.6b-terminal-instruct


## Cell 16: Summary

In [16]:
print("\n" + "=" * 60)
print("‚úÖ ALL TASKS COMPLETED!")
print("=" * 60)

print("\nüì¶ LOCAL SAVES:")
print(f"   LoRA Adapters: {CONFIG['adapter_save_path']}")
print(f"   Merged Model:  {CONFIG['merged_model_path']}")

print("\nüåê HUGGINGFACE REPOS:")
print(f"   Adapters: https://huggingface.co/{HF_REPO_ADAPTERS}")
print(f"   Merged:   https://huggingface.co/{HF_REPO_MERGED}")

print("\nüìä ACCURACY RESULTS:")
print(f"   In-Memory (after training): {100*single_results['exact_match']/single_results['total']:.1f}%")
print(f"   Pre-Merge Verification:     {pre_merge_accuracy:.1f}%")

print("\n" + "=" * 60)
print("üìã HOW TO LOAD THE MODELS")
print("=" * 60)
print("""
# Option 1: Load merged model directly (simplest)
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("{}")
tokenizer = AutoTokenizer.from_pretrained("{}")

# Option 2: Load base model + LoRA adapters  
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B")
model = PeftModel.from_pretrained(base, "{}")
tokenizer = AutoTokenizer.from_pretrained("{}")
""".format(HF_REPO_MERGED, HF_REPO_MERGED, HF_REPO_ADAPTERS, HF_REPO_ADAPTERS))

print("üìå NEXT STEPS:")
print("   1. Run 02_evaluate_all_sources.ipynb to verify accuracy from all sources")
print("   2. Run 03_load_and_test_all.ipynb for interactive testing")
print("   3. Update model cards on HuggingFace")

print("\n" + "=" * 60)


‚úÖ ALL TASKS COMPLETED!

üì¶ LOCAL SAVES:
   LoRA Adapters: ../outputs/lora_adapters
   Merged Model:  ../outputs/merged_model

üåê HUGGINGFACE REPOS:
   Adapters: https://huggingface.co/Eng-Elias/qwen3-0.6b-terminal-instruct-lora
   Merged:   https://huggingface.co/Eng-Elias/qwen3-0.6b-terminal-instruct

üìä ACCURACY RESULTS:
   In-Memory (after training): 93.0%
   Pre-Merge Verification:     90.0%

üìã HOW TO LOAD THE MODELS

# Option 1: Load merged model directly (simplest)
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("Eng-Elias/qwen3-0.6b-terminal-instruct")
tokenizer = AutoTokenizer.from_pretrained("Eng-Elias/qwen3-0.6b-terminal-instruct")

# Option 2: Load base model + LoRA adapters  
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B")
model = PeftModel.from_pretrained(base, "Eng-Elias/qwen3-0.6b-terminal-instruct-