In [2]:
import numpy as np
import pandas as pd

In [3]:
import torch
from sentence_transformers import SentenceTransformer, util

In [4]:
df = pd.read_csv('commands.csv')
c = pd.read_csv('c.csv')

In [5]:
swap_dict = {
    # --- File & Directory Management ---
    'pwd': 'cd',
    'ls': 'dir',
    'ls -la': 'dir /a',
    'ls -lh': 'dir',
    'ls -l': 'dir',
    'ls -R': 'dir /s',
    'cd ..': 'cd ..',
    'cd ~': 'cd %HOMEPATH%',
    'mkdir newdir': 'mkdir newdir',
    'mkdir -p /path/to/newdir': 'mkdir "C:\\path\\to\\newdir"',
    'rmdir empty_dir': 'rmdir empty_dir',
    'rm file.txt': 'del file.txt',
    'rm -r dir1': 'rmdir /s /q dir1',
    'rm -rf directory': 'rmdir /s /q directory',
    'cp file.txt /backup': 'copy file.txt C:\\backup\\',
    'cp -r dir1 dir2': 'xcopy dir1 dir2 /e /i',
    'mv oldname.txt newname.txt': 'rename oldname.txt newname.txt',
    'mv dir1 /archive': 'move dir1 C:\\archive\\',
    'touch newfile.txt': 'type nul > newfile.txt',
    'cat file.txt': 'type file.txt',
    'cat file1.txt file2.txt': 'type file1.txt file2.txt > combined.txt',
    'head -n 5 access.log': 'powershell -Command "Get-Content access.log -TotalCount 5"',
    'tail -n 10 error.log': 'powershell -Command "Get-Content error.log -Tail 10"',
    'tail -f error.log': 'powershell -Command "Get-Content error.log -Wait"',
    'stat file.txt': 'dir file.txt',
    'echo "text" > file.txt': 'echo text > file.txt',

    # --- Permissions & Ownership (limited in Windows) ---
    'chmod 755 script.sh': 'icacls script.sh /grant Everyone:F',
    'chmod +x script.sh': 'icacls script.sh /grant Everyone:F',
    'chown user:group file.txt': 'icacls file.txt /setowner user',
    'chown user file.txt': 'icacls file.txt /setowner user',

    # --- System Info ---
    'uname -a': 'systeminfo',
    'uname -r': 'ver',
    'uname -m': 'echo %PROCESSOR_ARCHITECTURE%',
    'df -h': 'wmic logicaldisk get size,freespace,caption',
    'free -m': 'wmic os get FreePhysicalMemory,TotalVisibleMemorySize /Format:List',
    'uptime': 'net stats srv',
    'top': 'tasklist',
    'htop': 'taskmgr',

    # --- Process Management ---
    'ps aux': 'tasklist',
    'ps -ef': 'tasklist',
    'kill -9 1234': 'taskkill /PID 1234 /F',
    'kill 1234': 'taskkill /PID 1234',
    'pkill nginx': 'taskkill /IM nginx.exe /F',

    # --- User Management ---
    'whoami': 'whoami',
    'id': 'whoami /user',
    'adduser newuser': 'net user newuser /add',
    'userdel -r testuser': 'net user testuser /delete',
    'passwd newuser': 'net user newuser *',
    'netstat -tulpn': 'netstat -ano',
    'ifconfig': 'ipconfig /all',
    'ping -c 4 google.com': 'ping google.com',
    'ping 8.8.8.8': 'ping 8.8.8.8',
    'traceroute google.com': 'tracert google.com',

    # --- Networking ---
    'ip addr show': 'ipconfig',
    'ss -tulwn': 'netstat -ano',
    'netstat -an': 'netstat -an',
    'ip link show': 'netsh interface show interface',
    'route -n': 'route print',

    # --- Disk & Hardware ---
    'lsblk': 'wmic logicaldisk get name,size,description',
    'lscpu': 'wmic cpu get name,numberofcores,numberoflogicalprocessors',
    'df /': 'wmic logicaldisk get size,freespace,caption',
    'du -sh /var/log': 'powershell -Command "Get-ChildItem C:\\Windows\\Logs -Recurse | Measure-Object -Sum Length"',

    # --- Package Management (APT/YUM/DNF → Chocolatey or Winget) ---
    'apt update': 'choco upgrade all -y',
    'apt install nginx': 'choco install nginx -y',
    'apt remove nginx': 'choco uninstall nginx -y',
    'yum install httpd': 'choco install apache-httpd -y',
    'dnf install python3': 'choco install python -y',

    # --- Miscellaneous ---
    'clear': 'cls',
    'history': 'doskey /history',
    'date': 'date /t',
    'time': 'time /t',
    'echo $PATH': 'echo %PATH%',
    'exit': 'exit'
}

df["windows"] = df["command"].map(swap_dict).fillna(df["command"])


In [6]:
queries_list = df['user_query'].tolist()
commands_list = df['windows'].tolist()

# Load semantic model
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')

# Encode all queries
query_embeddings = model.encode(queries_list, convert_to_tensor=True)


In [7]:
import re

def extract_files_or_paths(text):
    # Match common filename patterns
    patterns = re.findall(r'\b[\w\-.]+\.\w+\b', text)  # e.g., file.txt, config.conf
    paths = re.findall(r'(?:/[\w\-.]+)+', text)        # e.g., /etc/config
    return list(set(patterns + paths))


In [8]:
def get_best_command(user_input, top_k=3):
    input_emb = model.encode(user_input, convert_to_tensor=True)
    scores = util.cos_sim(input_emb, query_embeddings)[0]
    top_results = torch.topk(scores, k=len(scores))

    seen_commands = set()
    matches = []
    extracted_files = extract_files_or_paths(user_input)

    for score, idx in zip(top_results[0], top_results[1]):
        cmd = commands_list[idx]
        if cmd not in seen_commands:
            # Dynamic substitution logic
            if extracted_files:
                example_match = re.search(r'\b[\w\-.]+\.\w+\b', cmd)
                if example_match:
                    example_file = example_match.group(0)
                    cmd = cmd.replace(example_file, extracted_files[0])

            matches.append({
                "user_query": queries_list[idx],
                "command": cmd,
                "score": float(score)
            })
            seen_commands.add(cmd)
        if len(matches) >= top_k:
            break

    return matches


In [13]:
test_queries = [
    "open mynotes.txt using vim in read only mode",
    "copy config.conf to /etc/backup/",
    "view log.txt content",
    'copy new.txt to backup/'
]

for q in test_queries:
    print(f"\nQuery: {q}")
    results = get_best_command(q)
    for r in results:
        print(f"  → {r['command']} (matched '{r['user_query']}') score={r['score']:.4f}")



Query: open mynotes.txt using vim in read only mode
  → vim -R mynotes.txt (matched 'Open file.txt in Vim in read-only mode') score=0.8573
  → vim -R mynotes.txt (matched 'Please open file.txt using Vim in read-only mode') score=0.8497
  → vim -R mynotes.txt (matched 'How do I open `file.txt` with Vim in read-only mode?') score=0.8479

Query: copy config.conf to /etc/backup/
  → nano -B /etc/backup (matched 'Open `config.conf` using Nano, and have it automatically generate backups') score=0.7514
  → nano -B /etc/backup (matched 'How can I use Nano to edit `config.conf` so that it creates a backup on save?') score=0.7180
  → nano -B /etc/backup (matched 'What's the best way to open `config.conf` in Nano and get it to create a backup?') score=0.7097

Query: view log.txt content
  → cat log.txt (matched 'Display the contents of `logfile.txt`') score=0.8566
  → cat log.txt (matched 'How do I view `logfile.txt`?') score=0.8319
  → cat log.txt (matched 'Get the contents of `logfile.txt`') s