In [1]:
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets
!pip install bitsandbytes>=0.39.0
!pip install --upgrade accelerate transformers

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for accelerate (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel, PeftConfig

In [4]:
def load_model_and_lora(base_model_name="Qwen/Qwen2-0.5B", lora_path="/content/drive/MyDrive/Qwen2-0.5B-lora"):
    # 1. Load base model with quantization
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16
    )

    # Add padding token if it doesn't exist
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=quantization_config,
        device_map="auto",
        pad_token_id=tokenizer.pad_token_id  # Set pad_token_id in the model config
    )

    # 3. Load LoRA configuration and model
    try:
        config = PeftConfig.from_pretrained(lora_path)
        model = PeftModel.from_pretrained(base_model, lora_path)
    except ValueError as e:
        print(f"Error loading LoRA: {e}")
        print("Falling back to base model only")
        model = base_model

    return model, tokenizer

In [5]:
def generate_response(model, tokenizer, prompt, max_length=100):
    system_prompt = """You are a Windows CMD command generator. Output only the command, no explanations.

Rules:
1. Use complete paths (C:\\Users\\%USERNAME%\\...)
2. Use quotes for spaces in paths
3. Use proper Windows commands (dir, md, move, copy, del, ren)

Example:
User: "list files in downloads"
Output: dir "C:\\Users\\%USERNAME%\\Downloads"
"""

    # Format prompt
    formatted_prompt = f"{system_prompt}\nUser: {prompt}\nOutput:"

    # Clear cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    # Tokenize with original max_length
    inputs = tokenizer(formatted_prompt,
                      return_tensors="pt",
                      truncation=True,
                      max_length=512).to(model.device)

    # Generate with strict parameters
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.2,  # Keep temperature low
            top_p=0.9,
            repetition_penalty=1.3,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )

    # Clean response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    try:
        # Get only the command
        response = response.split("Output:")[-1].strip()
        response = response.split("\n")[0]  # Take first line only

        # Remove any UI artifacts
        artifacts = ['<', '>', 'COMMAND:', 'Human:', 'AI:', 'User:']
        for artifact in artifacts:
            response = response.replace(artifact, '')

        response = response.strip()

    except:
        return "Error: Could not generate valid command"

    return response

In [7]:
if __name__ == "__main__":
   # Load model and LoRA
   model, tokenizer = load_model_and_lora()

   # Test generation
   test_questions = [
       "I want to see the contents of my current folder, what to do?",
       "How do I create a new directory?",
       "move the file report.pdf from Downloads to Documents",
       "move all jpg files from Pictures to Backup folder",
      #  "move 'My Summer Vacation.mp4' from Videos to Holiday Memories folder",
      #  "copy config.json from project folder to backup",
      #  "move thesis.docx from Desktop/University/Draft to Documents/Final Thesis",
      #  "create a new folder called Projects in Documents",
      #  "delete temp.txt from Downloads folder",
      #  "rename old_report.pdf to final_report.pdf in Documents folder",
      #  "move presentation.pptx and notes.docx from Desktop to Meeting_Materials folder",
      #  "move all python files from code/src to projects/python/backup"
   ]

   for question in test_questions:
       response = generate_response(model, tokenizer, question)
       print(f"▶️{response}\n")

model.safetensors:  42%|####2     | 419M/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

▶️cd C:\Users\%USERNAME%\downloads

▶️mkdir C:/newdirectory

▶️rename report.pdf C:\Documents and Settings\User\Reports\FileReport.txt

▶️rename *.jpg *Backup/*Pictures/*.jpg

