In [14]:
import numpy as np
import pandas as pd

In [15]:
c = pd.read_csv('c.csv')
df  = pd.read_csv('commands.csv')

In [16]:
df.head()

Unnamed: 0,user_query,command,description
0,How do I run `htop` so it refreshes every 0.5 ...,htop -d 5,Runs htop with a 0.5-second refresh interval.
1,What's the command to start `htop` with a half...,htop -d 5,Runs htop with a 0.5-second refresh interval.
2,Could you give me the command to launch `htop`...,htop -d 5,Runs htop with a 0.5-second refresh interval.
3,I need to execute `htop` with a 0.5-second del...,htop -d 5,Runs htop with a 0.5-second refresh interval.
4,How can I get `htop` to refresh twice a second?,htop -d 5,Runs htop with a 0.5-second refresh interval.


In [18]:
swap_dict = {
    # --- File & Directory Management ---
    'pwd': 'cd',
    'ls': 'dir',
    'ls -la': 'dir /a',
    'ls -lh': 'dir',
    'ls -l': 'dir',
    'ls -R': 'dir /s',
    'cd ..': 'cd ..',
    'cd ~': 'cd %HOMEPATH%',
    'mkdir newdir': 'mkdir newdir',
    'mkdir -p /path/to/newdir': 'mkdir "C:\\path\\to\\newdir"',
    'rmdir empty_dir': 'rmdir empty_dir',
    'rm file.txt': 'del file.txt',
    'rm -r dir1': 'rmdir /s /q dir1',
    'rm -rf directory': 'rmdir /s /q directory',
    'cp file.txt /backup': 'copy file.txt C:\\backup\\',
    'cp -r dir1 dir2': 'xcopy dir1 dir2 /e /i',
    'mv oldname.txt newname.txt': 'rename oldname.txt newname.txt',
    'mv dir1 /archive': 'move dir1 C:\\archive\\',
    'touch newfile.txt': 'type nul > newfile.txt',
    'cat file.txt': 'type file.txt',
    'cat file1.txt file2.txt': 'type file1.txt file2.txt > combined.txt',
    'head -n 5 access.log': 'powershell -Command "Get-Content access.log -TotalCount 5"',
    'tail -n 10 error.log': 'powershell -Command "Get-Content error.log -Tail 10"',
    'tail -f error.log': 'powershell -Command "Get-Content error.log -Wait"',
    'stat file.txt': 'dir file.txt',
    'echo "text" > file.txt': 'echo text > file.txt',

    # --- Permissions & Ownership (limited in Windows) ---
    'chmod 755 script.sh': 'icacls script.sh /grant Everyone:F',
    'chmod +x script.sh': 'icacls script.sh /grant Everyone:F',
    'chown user:group file.txt': 'icacls file.txt /setowner user',
    'chown user file.txt': 'icacls file.txt /setowner user',

    # --- System Info ---
    'uname -a': 'systeminfo',
    'uname -r': 'ver',
    'uname -m': 'echo %PROCESSOR_ARCHITECTURE%',
    'df -h': 'wmic logicaldisk get size,freespace,caption',
    'free -m': 'wmic os get FreePhysicalMemory,TotalVisibleMemorySize /Format:List',
    'uptime': 'net stats srv',
    'top': 'tasklist',
    'htop': 'taskmgr',

    # --- Process Management ---
    'ps aux': 'tasklist',
    'ps -ef': 'tasklist',
    'kill -9 1234': 'taskkill /PID 1234 /F',
    'kill 1234': 'taskkill /PID 1234',
    'pkill nginx': 'taskkill /IM nginx.exe /F',

    # --- User Management ---
    'whoami': 'whoami',
    'id': 'whoami /user',
    'adduser newuser': 'net user newuser /add',
    'userdel -r testuser': 'net user testuser /delete',
    'passwd newuser': 'net user newuser *',
    'netstat -tulpn': 'netstat -ano',
    'ifconfig': 'ipconfig /all',
    'ping -c 4 google.com': 'ping google.com',
    'ping 8.8.8.8': 'ping 8.8.8.8',
    'traceroute google.com': 'tracert google.com',

    # --- Networking ---
    'ip addr show': 'ipconfig',
    'ss -tulwn': 'netstat -ano',
    'netstat -an': 'netstat -an',
    'ip link show': 'netsh interface show interface',
    'route -n': 'route print',

    # --- Disk & Hardware ---
    'lsblk': 'wmic logicaldisk get name,size,description',
    'lscpu': 'wmic cpu get name,numberofcores,numberoflogicalprocessors',
    'df /': 'wmic logicaldisk get size,freespace,caption',
    'du -sh /var/log': 'powershell -Command "Get-ChildItem C:\\Windows\\Logs -Recurse | Measure-Object -Sum Length"',

    # --- Package Management (APT/YUM/DNF → Chocolatey or Winget) ---
    'apt update': 'choco upgrade all -y',
    'apt install nginx': 'choco install nginx -y',
    'apt remove nginx': 'choco uninstall nginx -y',
    'yum install httpd': 'choco install apache-httpd -y',
    'dnf install python3': 'choco install python -y',

    # --- Miscellaneous ---
    'clear': 'cls',
    'history': 'doskey /history',
    'date': 'date /t',
    'time': 'time /t',
    'echo $PATH': 'echo %PATH%',
    'exit': 'exit'
}


In [19]:
df["windows"] = df["command"].map(swap_dict).fillna(df["command"])
print(df)

                                             user_query  \
0     How do I run `htop` so it refreshes every 0.5 ...   
1     What's the command to start `htop` with a half...   
2     Could you give me the command to launch `htop`...   
3     I need to execute `htop` with a 0.5-second del...   
4       How can I get `htop` to refresh twice a second?   
...                                                 ...   
7387               Initiate two pings to the IP address   
7388   Show me the command to send two ping requests to   
7389  I want to confirm if 192.168.1.1 is reachable....   
7390  Perform a `ping` to 192.168.1.1, limit it to t...   
7391                Go ahead and ping 192.168.1.1 twice   

                    command  \
0                 htop -d 5   
1                 htop -d 5   
2                 htop -d 5   
3                 htop -d 5   
4                 htop -d 5   
...                     ...   
7387  ping -c 2 192.168.1.1   
7388  ping -c 2 192.168.1.1   
7389  ping -

In [21]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util

# Load dataset
# df = pd.read_csv("commands.csv")

# Use user queries directly
queries_list = df['user_query'].tolist()
commands_list = df['windows'].tolist()

# Load semantic model
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')

# Encode all queries
query_embeddings = model.encode(queries_list, convert_to_tensor=True)

def get_best_command(user_input, top_k=3):
    input_emb = model.encode(user_input, convert_to_tensor=True)
    scores = util.cos_sim(input_emb, query_embeddings)[0]
    top_results = torch.topk(scores, k=len(scores))  # check all scores

    seen_commands = set()
    matches = []

    for score, idx in zip(top_results[0], top_results[1]):
        cmd = commands_list[idx]
        if cmd not in seen_commands:
            matches.append({
                "user_query": queries_list[idx],
                "command": cmd,
                "score": float(score)
            })
            seen_commands.add(cmd)
        if len(matches) >= top_k:
            break

    return matches

# Test
test_queries = [
    "show me all files in this folder with details",
    "open file.txt in read only mode using vim",
    "how to refresh htop every half second",
    "remove unused dependencies in fedora"
    
]

for q in test_queries:
    print(f"\nQuery: {q}")
    results = get_best_command(q)
    for r in results:
        print(f"  → {r['command']} (matched '{r['user_query']}') score={r['score']:.4f}")



Query: show me all files in this folder with details
  → ls -d */ (matched 'Show me only the directories in this folder') score=0.7997
  → dir /s (matched 'Show me all files and directories, including those nested within others') score=0.7784
  → dir /a (matched 'I need to see everything in this folder, including dot files, with their full attributes') score=0.7776

Query: open file.txt in read only mode using vim
  → vim -R file.txt (matched 'Open file.txt in Vim in read-only mode') score=0.9893
  → vim file.txt (matched 'Please open file.txt using Vim') score=0.8068
  → vim -u NONE file.txt (matched 'Run Vim on `file.txt` in a clean mode, ignoring `.vimrc`') score=0.7464

Query: how to refresh htop every half second
  → htop -d 5 (matched 'How do I run `htop` so it refreshes every 0.5 seconds?') score=0.9170
  → taskmgr (matched 'Execute the htop command') score=0.6853
  → htop -u user (matched 'Monitor processes for 'user' using `htop`') score=0.6008

Query: remove unused dependenc

In [22]:
test_queries = [
    "list all files in the current directory with sizes and permissions",
    "show hidden files too when listing directory contents",
    "display the disk usage of each folder in the current directory",
    "check how much free memory I have right now",
    "see running processes sorted by CPU usage",
    "find out my current working directory",
    "show the current system date and time",
    "display the first 10 lines of a file named log.txt",
    "count how many lines are in file.txt",
    "search for the word 'error' in system.log file",
    "show only the lines that contain 'python' in a file",
    "copy file.txt to the backup folder",
    "move all .log files to the logs directory",
    "remove a directory named temp along with its contents",
    "create a new directory called projects",
    "display the last 20 lines of messages.log file",
    "find all .py files in the current folder and subfolders",
    "change file permissions to make script.sh executable",
    "compress the backup folder into backup.tar.gz",
    "extract files from archive.tar.gz into current directory",
    "show network configuration details of my system",
    "display the IP address of my device",
    "ping google.com five times to check connectivity",
    "install nginx web server using apt",
    "update all installed packages on my ubuntu system",
    "remove apache2 from my system completely",
    "clean up unused packages and dependencies",
    "display all active network connections",
    "show currently logged in users",
    "reboot the system immediately"
]

for q in test_queries:
    print(f"\nQuery: {q}")
    results = get_best_command(q)
    for r in results:
        print(f"  → {r['command']} (matched '{r['user_query']}') score={r['score']:.4f}")


Query: list all files in the current directory with sizes and permissions
  → dir (matched 'List files with their permissions and human-readable sizes') score=0.8393
  → dir /a (matched 'Could you run the command to list all files with their sizes, permissions, and modification dates?') score=0.8242
  → ls -S (matched 'List files by size') score=0.7761

Query: show hidden files too when listing directory contents
  → dir /a (matched 'Display the contents of the current directory, showing hidden files and detailed information') score=0.8109
  → dir /s (matched 'How do I list files in subdirectories as well?') score=0.6548
  → ls -d */ (matched 'Show me only the directories in this folder') score=0.6546

Query: display the disk usage of each folder in the current directory
  → wmic logicaldisk get size,freespace,caption (matched 'Show me the disk usage for the root filesystem') score=0.7868
  → findmnt -D (matched 'I need to see the disk usage for all currently mounted filesystems') sco

In [23]:
model.save("saved_model_2")        # save
torch.save(query_embeddings, "query_embeddings_2.pt")  # save
torch.save(commands_list, "commands_list_2.pt")   # save the list too

In [27]:
test_queries = [
    "copy file.txt to the backup folder",
    "move all .log files to logs directory",
    "rename oldfile.txt to newfile.txt",
    "delete temp.txt permanently",
    "remove directory named temp along with its contents",
    "create a new folder called projects",
    "duplicate all files in folder1 to folder2",
    "replace all occurrences of 'foo' with 'bar' in file.txt"
]
for q in test_queries:
    print(f"\nQuery: {q}")
    results = get_best_command(q)
    for r in results:
        print(f"  → {r['command']} (matched '{r['user_query']}') score={r['score']:.4f}")


Query: copy file.txt to the backup folder
  → copy file.txt C:\backup\ (matched 'Copy `file.txt` to `/backup`') score=0.9340
  → cp -v file.txt /backup (matched 'Execute the command to copy file.txt to the /backup directory, verbosely') score=0.8657
  → cp -u file.txt /backup (matched 'Can you help me copy `file.txt` to `/backup` without overwriting an existing file unless it's older?') score=0.8485

Query: move all .log files to logs directory
  → cd /var/log (matched 'Move into the `/var/log` directory') score=0.7880
  → pushd /var/log (matched 'Could you move me to `/var/log` and push my current working directory?') score=0.7147
  → find /var -name '*.log' (matched 'I need to track down all .log files residing in /var') score=0.7036

Query: rename oldfile.txt to newfile.txt
  → rename oldname.txt newname.txt (matched 'What command should I use to rename `oldname.txt`'s filename to `newname.txt`?') score=0.8807
  → ln file1.txt file2.txt (matched 'Please show me how to hard link `fi

In [36]:
df['windows'].drop_duplicates().values

array(['htop -d 5', 'vim -R file.txt', 'nano -c config.conf', 'cd /etc',
       'ls -1', 'cp -u file.txt /backup', 'mv -n file.txt /tmp',
       'rm -v file.txt', 'cat << EOF > script.sh', 'less -i logfile.txt',
       'head -n -5 access.log', 'tail -q -n 3 file1.txt file2.txt',
       'uname -n', 'df -i', 'chmod -R 750 /data', 'chown :group file.txt',
       'apt clean', 'yum check-update', 'dnf autoremove',
       'ping -c 2 192.168.1.1', 'ifconfig eth0 down', 'netstat -r', 'w',
       'usermod -aG sudo newuser', 'groupadd newgroup', 'ps -u user',
       'pkill -u user', 'htop -t', "vim -c 'set number' file.txt",
       'nano -w config.conf', 'cd /usr/local/bin', 'ls -S',
       'cp -v file.txt /backup', 'mv -u dir1 /archive',
       'touch -a file.txt', 'cat -s file.txt', 'less -M logfile.txt',
       'head -q -n 2 file1.txt file2.txt', 'tail -v -n 2 error.log',
       'free -s 2', 'chmod u+x script.sh', 'chown -R user:group /var/www',
       'apt list --installed', 'yum history', '