In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes evaluate rouge-score

In [10]:
# ✅ Step 2: Load dataset
from datasets import load_dataset
import torch
import os


dataset = load_dataset("json", data_files={
    "train": "data/alpaca/train_alpaca.json",
    "test": "data/alpaca/test_alpaca.json"
})

In [11]:
# Check GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")


CUDA available: True
GPU: NVIDIA GeForce GTX 1650 Ti
GPU Memory: 4.3 GB


In [None]:


import torch
import os
from datasets import load_dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    TrainingArguments, 
    Trainer, 
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType

# Check GPU
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    
# Clear cache
torch.cuda.empty_cache()

# ✅ Step 1: Load dataset
dataset = load_dataset("json", data_files={
    "train": "data/alpaca/train_alpaca.json",
    "test": "data/alpaca/test_alpaca.json"
})

print(f"Train samples: {len(dataset['train'])}")
print(f"Test samples: {len(dataset['test'])}")

# ✅ Step 2: Load model (NO QUANTIZATION - simpler approach)
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

# Move to GPU manually
if torch.cuda.is_available():
    model = model.cuda()

print(f"Model loaded. Parameters: {model.num_parameters():,}")

# ✅ Step 3: LoRA config (aggressive settings for speed)
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=4,  # Smaller rank for speed
    lora_alpha=8,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"],  # Just q and v for speed
    bias="none"
)

model = get_peft_model(model, peft_config)
print(f"Trainable params: {model.get_nb_trainable_parameters()}")

# ✅ Step 4: Data preprocessing (SIMPLIFIED)
def preprocess_function(examples):
    texts = [
        f"### Instruction:\n{inst}\n\n### Response:\n{resp}{tokenizer.eos_token}"
        for inst, resp in zip(examples["instruction"], examples["output"])
    ]

    model_inputs = tokenizer(
        texts,
        truncation=True,
        padding="max_length",     # ✅ force padding for batching
        max_length=256,
        return_tensors="pt"
    )

    # Set labels = input_ids (cloned as tensor)
    model_inputs["labels"] = model_inputs["input_ids"].clone()

    return model_inputs


# Process datasets and remove original columns
print("Tokenizing datasets...")
train_dataset = dataset["train"].map(
    preprocess_function,
    batched=True,
    remove_columns=dataset["train"].column_names
)

test_dataset = dataset["test"].map(
    preprocess_function, 
    batched=True,
    remove_columns=dataset["test"].column_names  # Remove ALL original columns
)


training_args = TrainingArguments(
    output_dir="./tinyllama-cli-lora",
    per_device_train_batch_size=2, 
    gradient_accumulation_steps=4,
    learning_rate=3e-4,
    num_train_epochs=1,
    warmup_steps=50,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    fp16=True,
    dataloader_drop_last=True,
    remove_unused_columns=False,
    report_to=None,
    optim="adamw_torch",
    max_grad_norm=1.0,
    lr_scheduler_type="cosine"
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

print("Starting training...")
print(f"Total steps: {len(train_dataset) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)}")

# Train the model
trainer.train()

# Save
print("Saving model...")
model.save_pretrained("./tinyllama-cli-lora")
tokenizer.save_pretrained("./tinyllama-cli-lora")

print("✅ Training completed!")

# ✅ Step 7: Quick test
print("\n" + "="*50)
print("QUICK TEST")
print("="*50)

# Test the fine-tuned model
model.eval()

test_prompts = [
    "How do I create a new Git repository?",
    "Show me how to list files in a directory using ls command",
    "How to compress a file with gzip?"
]

for prompt in test_prompts:
    formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=80,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("### Response:\n")[-1].strip()
    
    print(f"\nPrompt: {prompt}")
    print(f"Response: {response}")
    print("-" * 40)

print("\n✅ Done! Model saved to ./tinyllama-cli-lora")
print("Next: Create agent.py using this model")

# Memory cleanup
torch.cuda.empty_cache()

CUDA available: True
GPU: NVIDIA GeForce GTX 1650 Ti
Train samples: 295
Test samples: 34


  0%|          | 0/36 [07:21<?, ?it/s]


Model loaded. Parameters: 1,100,048,384
Trainable params: (563200, 1100611584)
Tokenizing datasets...


Map: 100%|██████████| 295/295 [00:00<00:00, 1287.53 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 1031.43 examples/s]


Starting training...
Total steps: 36


 28%|██▊       | 10/36 [04:23<11:26, 26.39s/it]
 28%|██▊       | 10/36 [04:23<11:26, 26.39s/it]

{'loss': 2.3312, 'grad_norm': 0.41855359077453613, 'learning_rate': 5.9999999999999995e-05, 'epoch': 0.27}


 56%|█████▌    | 20/36 [08:47<07:01, 26.35s/it]
 56%|█████▌    | 20/36 [08:47<07:01, 26.35s/it]

{'loss': 2.3112, 'grad_norm': 0.5944775938987732, 'learning_rate': 0.00011999999999999999, 'epoch': 0.54}


 83%|████████▎ | 30/36 [13:10<02:38, 26.34s/it]
 83%|████████▎ | 30/36 [13:10<02:38, 26.34s/it]

{'loss': 2.2465, 'grad_norm': 0.7547395825386047, 'learning_rate': 0.00017999999999999998, 'epoch': 0.82}


100%|██████████| 36/36 [15:48<00:00, 26.28s/it]
100%|██████████| 36/36 [15:50<00:00, 26.40s/it]


{'train_runtime': 950.3937, 'train_samples_per_second': 0.31, 'train_steps_per_second': 0.038, 'train_loss': 2.264140182071262, 'epoch': 0.98}
Saving model...
✅ Training completed!

QUICK TEST

Prompt: How do I create a new Git repository?
Response: The following command creates a new Git repository:

```
git init
```

This command creates a new Git repository and initializes it with a bare repository.

### Example:

```
$ git init
Initialized empty Git repository in /home/username/.git/
```

This command creates a new Git repository with a _bare_ repository.
----------------------------------------

Prompt: Show me how to list files in a directory using ls command
Response: To list files in a directory using the ls command, you can use the -l option. Here's an example:


```
$ ls -l
```


This will list the contents of the current directory and all directories below it, including subdirectories with a file name.


```
$ ls -l /
```


This
----------------------------------------

Prom

In [27]:
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch

# Load base model
base = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.float16,
    device_map={"": 0}
)

# Load LoRA adapter
ft = PeftModel.from_pretrained(base, "./tinyllama-cli-lora")

# Merge LoRA into base weights
merged = ft.merge_and_unload()

# Save merged model (full)
merged.save_pretrained("./merged-tinyllama")

# Also save tokenizer (once)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
tokenizer.save_pretrained("./merged-tinyllama")

print("✅ Full model saved to ./merged-tinyllama")


✅ Full model saved to ./merged-tinyllama


In [16]:
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel

# ✅ Load fine-tuned model
base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model_path = "./tinyllama-cli-lora"

# Load base + adapter
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Setup inference pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# ✅ Load test2.json
with open("data/alpaca/test2_alpaca.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

# ✅ Run generation
print("Generating responses from fine-tuned model...\n")
for sample in test_data:
    prompt = sample["instruction"].strip()
    formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"

    inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=128,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("### Response:")[-1].strip()

    print(f"🔹 Prompt: {prompt}")
    print(f"🧠 Response: {response}")
    print("-" * 60)


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

Generating responses from fine-tuned model...

🔹 Prompt: Get `grep` to not output file name

When I use 
grep -o
 to search in multiple files, it outputs each result prefixed with the file name. How can I prevent this prefix? I want the results without the file names.
🧠 Response: I use the command:
------------------------------------------------------------
🔹 Prompt: How to cycle through reverse-i-search in Bash?

In the GNU bash shell, I can type 
Ctrl
 + 
R
 to search for a matching command previously run. E.g., if I type 
Ctrl
 + 
R
 and then "
grep
", it lists my last 
grep
 command, and I can hit 
Enter
 to use it.


This only gives one suggestion though. Is there any way to cycle through other previously typed matching commands?
🧠 Response: Bash's implementation of the "reverse-i-search" (RIS) technique is implemented using the "read" builtin.

To perform the same pattern matching as described above, you can use the following command:

```bash
read -r match
grep -iq -R "$match" 

KeyboardInterrupt: 

In [22]:
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
import evaluate
from tqdm import tqdm

# ✅ Load tokenizer and models
base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
fine_tuned_path = "./tinyllama-cli-lora"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# ✅ Load base model on GPU without disk offloading
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map={"": 0}  # ✅ avoid 'disk' mapping issue
)

# ✅ Load LoRA weights into base model
ft_model = PeftModel.from_pretrained(base_model, fine_tuned_path)

# ✅ Set up pipelines
pipe_base = pipeline("text-generation", model=base_model, tokenizer=tokenizer)
pipe_ft = pipeline("text-generation", model=ft_model, tokenizer=tokenizer)

# ✅ Load first 5 test prompts from test_alpaca.json
with open("data/alpaca/test_alpaca.json", "r", encoding="utf-8") as f:
    base_test_data = json.load(f)

test_prompts = base_test_data[:5]

# ✅ Add 2 edge cases manually
test_prompts += [
    {"instruction": "How do I recursively delete all `.tmp` files in current dir?"},
    {"instruction": "What happens if I run `rm -rf /` with root permissions?"}
]

# ✅ Initialize metric evaluators
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

refs, base_outputs, ft_outputs = [], [], []

def generate_output(pipe, prompt):
    formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"
    output = pipe(
        formatted,
        max_new_tokens=128*2,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )[0]["generated_text"]
    return output.split("### Response:")[-1].strip()

# ✅ Generate and compare outputs
print("🔍 Generating predictions for base and fine-tuned models...\n")

for ex in test_prompts:
    prompt = ex["instruction"]
    reference = ex.get("output", "")

    base_resp = generate_output(pipe_base, prompt)
    ft_resp = generate_output(pipe_ft, prompt)

    refs.append(reference if reference else ft_resp)  # fallback for edge cases
    base_outputs.append(base_resp)
    ft_outputs.append(ft_resp)

# ✅ Display results for manual scoring
print("\n\n📋 Evaluation Table (Manual Scoring):\n")
print("| # | Prompt | Base Output (truncated) | Fine-Tuned Output (truncated) | Plan Score |")
print("|---|--------|--------------------------|-------------------------------|-------------|")

for i, ex in enumerate(test_prompts):
    prompt = ex["instruction"][:35].replace("\n", " ")
    base_short = base_outputs[i][:40].replace("\n", " ")
    ft_short = ft_outputs[i][:40].replace("\n", " ")
    print(f"| {i+1} | {prompt}... | {base_short}... | {ft_short}... | ? |")

# ✅ Compute BLEU / ROUGE-L
print("\n\n📈 Metrics Evaluation")
print("ROUGE:", rouge.compute(predictions=ft_outputs, references=refs))
print("BLEU :", bleu.compute(predictions=ft_outputs, references=[[r] for r in refs]))


The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

🔍 Generating predictions for base and fine-tuned models...



📋 Evaluation Table (Manual Scoring):

| # | Prompt | Base Output (truncated) | Fine-Tuned Output (truncated) | Plan Score |
|---|--------|--------------------------|-------------------------------|-------------|
| 1 | Remove files from tar archive  I ha... | I don't have the capability to run a com... | I don't have the capability to run a com... | ? |
| 2 | Display only relevant hunks of a di... | I'm not sure if I'm asking the right que... | I'm not sure if I'm asking the right que... | ? |
| 3 | Why use superflous dash (-) to pass... | The reason is that the tar command is a ... | The reason is that the tar command is a ... | ? |
| 4 | Why is less being run unnecessarily... | ... | ... | ? |
| 5 | How to keep track of changes in /et... | ... | ... | ? |
| 6 | How do I recursively delete all `.t... | ``` #!/bin/bash  # Recursively delete al... | ``` #!/bin/bash  # Recursively delete al... | ? |
| 7 | What happens if I run 

In [25]:
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=torch.float16,
    device_map="auto"
)

# Load LoRA adapter
ft_model = PeftModel.from_pretrained(base_model, "./tinyllama-cli-lora")

# Merge LoRA weights
merged_model = ft_model.merge_and_unload()

# Save the full merged model to a new folder
merged_model.save_pretrained("./merged-tinyllama")
print("✅ Merged model saved to ./merged-tinyllama")


ValueError: Can't find 'adapter_config.json' at './tinyllama-cli-lora'