In [None]:
#| default_exp core

In [None]:
#| export
import subprocess,json,shutil,os
from fastcore.utils import *
from fastcore.xdg import xdg_config_home
from configparser import ConfigParser

from safecmd.bashxtract import *

# Core API
> Core API for safecmd

## Introduction

`safecmd.core` provides a safe execution layer for shell commands. It's designed for situations where you need to run bash commands from untrusted sources—such as LLM-generated commands—while ensuring they can't modify your system in dangerous ways.

The module builds on top of `safecmd.bashxtract` (which parses bash into an AST and extracts commands) to validate commands against an allowlist before execution. The key insight is that rather than trying to blacklist dangerous commands (which is error-prone), we whitelist a generous set of read-only and easily-reverted commands that are safe to run.

The core workflow is:

1. Parse the bash command string using `extract_commands()` from bashxtract
2. Check each extracted command against `ok_cmds` (the allowlist). Commands inside substitutions (`$(...)`), subshells, pipelines, etc are extracted recursively, so nested commands are also validated.
3. Check that output redirects only write to allowed destinations (default: `./` and `/tmp`)
4. If everything passes, execute the command and return the result

This approach handles complex bash syntax correctly—pipelines, command substitutions, subshells, and more—because it uses a proper bash parser rather than regex or string splitting.

The allowlist (`ok_cmds`) uses **prefix matching** to determine if a command is permitted. A simple entry like `'ls'` matches any command starting with `ls`—so `ls`, `ls -la`, and `ls /home/user` are all allowed. A multi-word entry like `'git status'` only matches commands that start with both those words—so `git status` and `git status --short` are allowed, but `git push` is not.

This prefix approach lets you be precise about which subcommands are safe. For instance, you might allow `git log`, `git status`, and `git diff` (all read-only) while blocking `git push` and `git reset` (which modify state).

Some commands are mostly safe but have a few dangerous flags. For example, `find` is useful for searching files, but its `-exec` flag can run arbitrary commands—which defeats our safety guarantees. For these cases, you can specify a **denied list** of flags that will cause the command to be rejected. So we allow `find . -name '*.py'` but block `find . -exec rm {} \;` because `-exec` is in the denied list.

Output redirects (`>`, `>>`, etc.) are also validated. By default, redirects can only write to the current directory (`./`) or `/tmp`. Bare relative paths like `file.txt` are normalized to `./file.txt` before matching. You can customize allowed destinations via `ok_dests`.

The first time this module is used a config file (`config.ini`) is created with the default configuration. The file location follows the XDG Base Directory spec via `xdg_config_home()`:

- **Linux**: `~/.config/safecmd/config.ini`
- **macOS**: `~/Library/Application Support/safecmd/config.ini`
- **Windows**: `%LOCALAPPDATA%\safecmd\config.ini` (typically `C:\Users\<username>\AppData\Local\safecmd\config.ini`)

This file can be edited to change configuration.

## How to use

The simplest way to use safecmd is to call `safe_run()` with a bash command string. This function validates the command against the built-in allowlist and executes it if safe, returning the combined stdout/stderr output as a string. If the command fails, it raises an `IOError`. If the command or destinations aren't allowed, it raises either `DisallowedCmd` or `DisallowedDest`.

For example: `safe_run('ls -la | grep py')` will execute and return the filtered directory listing, while `safe_run('rm -rf /')` will raise a `DisallowedCmd` exception before anything dangerous happens.

The module comes with a predefined set of safe commands. This includes common read-only utilities like `cat`, `grep`, `ls`, `diff`, builtins like `cd`, `export`, `[,` and `true`, as well as safe git subcommands like `git log`, `git status`, and `git diff`. The `find` command is included with a denied list that blocks dangerous flags like `-exec` and `-delete`.

Output redirects are allowed but only to permitted destinations. By default, commands can write to the current directory (`./`) and `/tmp`. You can customize this by passing a `dests` parameter to `safe_run()`. For example, `safe_run(cmd, add_dests='~/')` would also allow writing to the home directory.

## API

### Helpers

In [None]:
#| export
def run(cmd, ignore_ex=False):
    "Run `cmd` in shell; return stdout (+ stderr if any); raise IOError on failure"
    res = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    out = res.stdout.strip()
    if res.stderr: out += ('\n' if out else '') + res.stderr.strip()
    if ignore_ex: return (res.returncode, out)
    if res.returncode: raise IOError(out)
    return out

Executes a shell command and returns its combined stdout/stderr output. If `ignore_ex=True`, returns a tuple of `(returncode, output)` instead of raising on failure. This is the low-level execution function—it doesn't do any safety checking.

In [None]:
from fastcore.test import test_fail,test_eq

In [None]:
test_eq(run('echo hello'), 'hello')
test_eq(run('echo out; echo err >&2'), 'out\nerr')
test_eq(run('exit 1', ignore_ex=True), (1, ''))
test_eq(run('echo fail >&2; exit 1', ignore_ex=True), (1,'fail'))
test_fail(lambda: run('exit 1'))

### Command Specifications

In [None]:
#| export
class CmdSpec(BasicRepr):
    def __init__(self,
        name,  # the command (str, will be split into tuple)
        denied=None):  # if set, these flags blocked
        self.name = tuple(name.split())
        self.denied = set(denied or [])

    @classmethod
    def from_str(cls, s):
        "Create from 'cmd:-flag1,-flag2' format"
        name, _, denied = s.partition(':')
        return cls(name, denied.split('|') if denied else None)
        
    def __hash__(self): return hash(self.name)
    def __eq__(self, b): return self.name==b.name
    
    def __repr__(self):
        s = ' '.join(self.name)
        if self.denied: s += f' !{self.denied}'
        return s
    
    def __call__(self, toks):
        "Returns True if allowed, False if no match or denied flag found"
        if tuple(toks[:len(self.name)]) != self.name: return False
        if not self.denied: return True
        for d in self.denied:
            if d in toks: return False  # exact match
            # Long flag with =value: --flag=value matches --flag
            if d.startswith('--'):
                if any(tok.startswith(d + '=') for tok in toks): return False
            # Single-letter short flag: check if letter appears in any combined -xyz arg
            elif len(d) == 2 and d[0] == '-':
                for tok in toks:
                    if tok.startswith('-') and not tok.startswith('--') and d[1] in tok[1:]: return False
        return True

`CmdSpec` represents an allowed command with optional denied flags. The `name` is stored as a tuple for prefix matching—so `CmdSpec('git log')` matches `git log`, `git log --oneline`, etc. The `denied` set contains flags that will cause the command to be rejected even if the prefix matches.

In [None]:
find = CmdSpec('find', denied=['-exec', '-delete'])
find

find !{'-delete', '-exec'}

In [None]:
assert find(['find', '.', '-name', '*.py'])
assert not find(['find', '.', '-exec', 'rm'])
assert not find(['ls', '-la'])

# Combined short flags should be caught
tar = CmdSpec('tar', denied=['-I', '--to-command'])
assert tar(['tar', '-xvf', 'file.tar'])      # allowed
assert not tar(['tar', '-I', 'zstd'])        # exact match blocked
assert not tar(['tar', '-xvfI', 'zstd'])     # combined flag blocked
assert not tar(['tar', '--to-command=cat'])  # long flag still works

The `from_str` classmethod provides a compact string syntax for creating `CmdSpec` objects. The format is `command:-flag1|-flag2` where the colon separates the command name from comma-separated denied flags.

For example, `CmdSpec.from_str('find:-exec|-delete')` creates a spec that allows `find` but blocks `-exec` and `-delete` flags. If no denied flags are needed, just pass the command name: `CmdSpec.from_str('cat')`.

In [None]:
test_eq(CmdSpec.from_str('cat'), CmdSpec('cat'))
test_eq(CmdSpec.from_str('find:-exec|-delete'), CmdSpec('find', denied=['-exec', '-delete']))
test_eq(CmdSpec.from_str('git log'), CmdSpec('git log'))

### Default Allowlists

In the default configuration, `ok_dests` specifies where output redirects can write (default: `./, /tmp`). `ok_cmds` contains a generous set of read-only commands plus some safe git operations. Note that `find` blocks dangerous flags like `-exec`:

In [None]:
#| exports
default_cfg = '''[DEFAULT]
ok_dests = ./, /tmp

ok_cmds = cat, head, tail, less, more, bat
    # Directory listing
    ls, tree, locate
    # Search
    grep, rg, ag, ack, fgrep, egrep
    # Text processing
    cut, sort, uniq, wc, tr, column
    # File info
    file, stat, du, df, which, whereis, type
    # Comparison
    diff, cmp, comm
    # Archives
    unzip, gunzip, bunzip2, unrar
    # Network
    ping, dig, nslookup, host
    # System info
    date, cal, uptime, whoami, hostname, uname, printenv
    # Utilities
    echo, printf, yes, seq, basename, dirname, realpath
    # Git (read-only)
    git log, git show, git diff, git status, git branch, git tag, git remote,
    git stash list, git blame, git shortlog, git describe, git rev-parse,
    git ls-files, git ls-tree, git cat-file, git config --get, git config --list
    # Git (workspace)
    git fetch, git add, git commit, git switch, git checkout
    # Builtins
    cd, pwd, export, test, [, true, false
    # Deny-lists
    find:-exec|-execdir|-delete|-ok|-okdir
    rg:--pre
    tar:--to-command|--use-compress-program|-I|--transform|--checkpoint-action|--info-script|--new-volume-script
    curl:-o|--output|-O|--remote-name
'''

In [None]:
# cfg_path.unlink()

In [None]:
#| export
cfg_path = xdg_config_home() / 'safecmd' / 'config.ini'
if not cfg_path.exists(): cfg_path.mk_write(default_cfg)

If `config.ini` doesn't exist, it's created with the default configuration, in the file location following the XDG Base Directory spec.

In [None]:
#| export
def _split_set(s):
    "Split comma-separated string into set of stripped strings"
    return {o.strip() for o in s.split(',')} if s else set()

def _split_specs(s):
    "Split comma-separated string into set of CmdSpecs"
    return {CmdSpec.from_str(c.strip()) for c in s.split(',') if c.strip()} if s else set()

def parse_cfg(cfg_str):
    "Parse config string, return (ok_dests set, ok_cmds set of CmdSpecs)"
    cp = ConfigParser()
    cp.read_string(cfg_str)
    cfg = cp['DEFAULT']
    ok_dests = _split_set(cfg.get('ok_dests', './, /tmp'))
    splitcmds = ','.join(cfg['ok_cmds'].splitlines())
    ok_cmds = _split_specs(splitcmds)
    return ok_dests, ok_cmds

In [None]:
#| export
ok_dests,ok_cmds = parse_cfg(cfg_path.read_text())

The config is parsed into `ok_dests` and `ok_cmds`.

In [None]:
print(ok_dests)
list(ok_cmds)[:7]

### Safe Execution

In [None]:
#| export
def validate_cmd(toks, cmds=None):
    "Check if toks matches an allowed command; returns False if denied flags present"
    if cmds is None: cmds = ok_cmds
    return any(spec(toks) for spec in cmds)

`validate_cmd` checks whether a tokenized command matches any entry in the allowlist by calling each `CmdSpec` until one returns `True`.

In [None]:
assert validate_cmd(['ls', '-la'])
assert validate_cmd(['git', 'status'])
assert validate_cmd(['find', '.', '-name', '*.py'])
assert not validate_cmd(['find', '.', '-exec', 'rm'])
assert not validate_cmd(['rm', '-rf', '/'])
assert not validate_cmd(['git', 'push'])

In [None]:
#| export
class DisallowedError(PermissionError):
    def __repr__(self): return f"{type(self).__name__}({self.args[0]!r})"

class DisallowedCmd(DisallowedError):
    def __init__(self, cmd): super().__init__(' '.join(cmd))

class DisallowedDest(DisallowedError):
    def __init__(self, dest): super().__init__(dest)

In [None]:
#| export
def normalize_dest(dest):
    "Normalize destination to absolute path, expanding ~ and env vars"
    dest = os.path.expanduser(dest)
    dest = os.path.expandvars(dest)
    return os.path.normpath(os.path.abspath(dest))

def validate_dest(dest, dests=None):
    "Check if dest (resolved to absolute) matches an allowed destination pattern"
    if dests is None: dests = ok_dests
    abs_dest = normalize_dest(dest)
    for pattern in dests:
        abs_pattern = normalize_dest(pattern)
        if abs_dest.startswith(abs_pattern): return True
    return False

<cell_type>markdown</cell_type>`normalize_dest` resolves paths to absolute, expanding `~` and environment variables (like `$HOME`) and normalizing `..` components. This prevents path traversal attacks where `./..` or `./subdir/../../escape` would otherwise match the `./` pattern. `validate_dest` checks if a resolved absolute path starts with any allowed pattern (also resolved to absolute).

In [None]:
cwd = os.getcwd()
home = os.path.expanduser('~')
parent = os.path.dirname(cwd)

# normalize_dest now returns absolute paths
test_eq(normalize_dest('file.txt'), f'{cwd}/file.txt')
test_eq(normalize_dest('./file.txt'), f'{cwd}/file.txt')
test_eq(normalize_dest('/tmp/file'), '/tmp/file')
test_eq(normalize_dest('../up.txt'), f'{parent}/up.txt')
test_eq(normalize_dest('~/home.txt'), f'{home}/home.txt')
test_eq(normalize_dest('$HOME/file'), f'{home}/file')

# With default ok_dests = {'./', '/tmp'}
assert validate_dest('file.txt')       # /cwd/file.txt matches /cwd/
assert validate_dest('./subdir/f.txt') # /cwd/subdir/f.txt matches /cwd/
assert validate_dest('/tmp/test')      # matches /tmp
assert not validate_dest('/etc/passwd')  # no match
assert not validate_dest('../up.txt')    # resolves outside cwd - blocked!
assert not validate_dest('~/file')       # ~/ not in defaults

`validate` checks a bash command string against the allowlists without executing it. This is useful for pre-validation (e.g., in hooks or UI) where you want to know if a command *would* be allowed before actually running it. It raises `DisallowedCmd` or `DisallowedDest` if validation fails.

In [None]:
#| export
def validate(
    cmd:str,  # Bash command string to validate
    cmds=None,  # Allowed commands set; defaults to ok_cmds
    dests=None,  # Allowed destinations set; defaults to ok_dests
):
    "Validate `cmd` against allowlists; raises DisallowedCmd or DisallowedDest on failure"
    if cmds is None: cmds = ok_cmds
    if dests is None: dests = ok_dests
    commands, ops, redirects = extract_commands(cmd)
    for c in commands:
        if not validate_cmd(c, cmds): raise DisallowedCmd(c)
    for op, dest in redirects:
        if not validate_dest(dest, dests): raise DisallowedDest(dest)

In [None]:
# Safe commands pass validation silently
validate('ls -la | grep py')
validate('git status && echo done')
validate('echo hi > file.txt')  # allowed - writes to ./file.txt
validate('cat data > /tmp/out')  # allowed - /tmp is ok

# Unsafe commands raise exceptions
test_fail(lambda: validate('rm -rf /'), exc=DisallowedCmd)
test_fail(lambda: validate('echo hi > /etc/badplace'), exc=DisallowedDest)
test_fail(lambda: validate('ls $(rm -rf /)'), exc=DisallowedCmd)  # nested command caught
test_fail(lambda: validate('echo > ../escape.txt'), exc=DisallowedDest)  # parent dir not allowed

# Path traversal attacks - must be blocked
test_fail(lambda: validate('echo hi > ./..'), exc=DisallowedDest)  # escapes via ./..
test_fail(lambda: validate('echo hi > ./../escape.txt'), exc=DisallowedDest)  # escapes via ./../
test_fail(lambda: validate('echo hi > ./subdir/../../escape.txt'), exc=DisallowedDest)  # nested escape
test_fail(lambda: validate('echo hi > /tmp/../bad.txt'), exc=DisallowedDest)  # escape via /tmp/../

# Resolved paths that stay within allowed dirs should work
validate('echo hi > ./subdir/../file.txt')  # resolves to ./file.txt, still in cwd

In [None]:
#| export
def safe_run(
    cmd:str,  # Bash command string to execute
    cmds:str=None,  # Allowed commands (comma-separated, config format); defaults to ok_cmds
    dests:str=None,  # Allowed destinations (comma-separated); defaults to ok_dests
    add_cmds:str=None,  # Temp add these commands
    add_dests:str=None,  # Temp add these destinations
    rm_cmds:str=None,  # Temp remove these commands
    rm_dests:str=None,  # Temp remove these destinations
    ignore_ex:bool=False,  # If True, return (returncode, output) instead of raising on error
) -> str:  # Combined stdout/stderr output
    "Run `cmd` in shell if all commands and destinations are in allowlists, else raise"
    eff_dests = _split_set(dests) if dests else ok_dests.copy()
    eff_cmds = _split_specs(cmds) if cmds else ok_cmds.copy()
    
    eff_dests |= _split_set(add_dests)
    eff_dests -= _split_set(rm_dests)
    eff_cmds |= _split_specs(add_cmds)
    eff_cmds -= {CmdSpec(c) for c in _split_set(rm_cmds)}
    
    validate(cmd, eff_cmds, eff_dests)
    return run(cmd, ignore_ex=ignore_ex)

`safe_run` is the main entry point. It parses the bash command, validates all extracted commands and redirect destinations against the allowlists, and only executes if everything passes. `DisallowedCmd` and `DisallowedDest` are raised for violations, giving clear error messages about what was blocked.

In [None]:
test_eq(safe_run('ls'), run('ls'))
test_eq(safe_run('echo hello | cat'), 'hello')
test_eq(safe_run('[ -f /etc/passwd ] && echo exists'), 'exists')
test_fail(lambda: safe_run('env rm -rf asdfff'), exc=DisallowedCmd)
test_fail(lambda: safe_run('echo hi > /badpath/file'), exc=DisallowedDest)
test_fail(lambda: safe_run('find . -exec rm'), exc=DisallowedCmd)
# Redirects to allowed destinations work
safe_run('echo test > /tmp/safecmd_test_xyz')
safe_run('echo test > test_file_xyz.txt')

Pass `ignore_ex=True` to return a tuple of `return_code,result` instead of raising on error.

In [None]:
safe_run('cat /nonexistent_xyz123 2>&1', ignore_ex=True)

(1, 'cat: /nonexistent_xyz123: No such file or directory')

## Bash tool

In Solveit, any function with types and a docstring can be used as a tool. Instead of raising an exception, it's best to return a success/error dict. The functions in this section wrap `safe_run` in this way, and provide documentation suitable for an LLM.

In [None]:
#| export
def bash(
    cmd:str,  # Bash command string to execute - all shell features like pipes and subcommands are supported
    rm_cmds:str=None,  # Temp remove these commands from allow list
    rm_dests:str=None  # Temp remove these destinations from allow list
): # dict with 'success' or 'error' key; value is stdout+stderr for success, or error message otherwise
    """Run a bash shell command line safely and return the concatencated stdout and stderr.
    `cmd` is parsed and all calls are checked against an allow-list.
    If the command is not allowed, STOP and inform the user of the command run and error details; so they can decide whether to whitelist
    it or run it themselves.
    The default allow-list includes most standard unix commands and git subcommands that do not change state or are easily reverted.
    All operators are supported. Output redirects are validated against allowed destinations (default: ./ and /tmp).
    rm_ params are comma-separated strs."""
    try: return {'success': safe_run(cmd, rm_cmds=rm_cmds, rm_dests=rm_dests)}
    except PermissionError as e: return {'error': e}

`bash` does not surface any parameters that could allow the LLM to add or change the allowed tool list.

In [None]:
bash('ls | head -2')

{'success': '_quarto.yml\n00_bashxtract.ipynb'}

In [None]:
bash('ls | head -2', rm_cmds='head')

{'error': DisallowedCmd('head -2')}

In [None]:
bash('sudo ls')

{'error': DisallowedCmd('sudo ls')}

In [None]:
#| export
def unsafe_bash(
    cmd:str,  # Bash command string to execute - all shell features like pipes and subcommands are supported
    cmds:str=None,  # Allowed commands; defaults to ok_cmds; DO NOT USE without upfront user permission
    dests:str=None,  # Allowed destinations; defaults to ok_dests; DO NOT USE without upfront user permission
    add_cmds:str=None,  # Temp add these commands to allow list; DO NOT USE without upfront user permission
    add_dests:str=None,  # Temp add these destinations to allow list; DO NOT USE without upfront user permission
    rm_cmds:str=None,  # Temp remove these commands from allow list
    rm_dests:str=None,  # Temp remove these destinations from allow list
): # dict with 'success' or 'error' key; value is stdout+stderr for success, or error message otherwise
    """Run a bash shell command line safely and return the output. `cmd` is parsed and all calls are checked against an allow-list.
    If the command is not allowed, STOP and inform the user of the command run and error details; so they can decide whether to whitelist
    it or run it themselves.
    The default allow-list includes most standard unix commands and git subcommands that do not change state or are easily reverted.
    All operators are supported. Output redirects are validated against allowed destinations.
    cmds/dests and add_/rm_ params are comma-separated strs."""
    try: return {'success': safe_run(cmd, cmds, dests, add_cmds=add_cmds, add_dests=add_dests, rm_cmds=rm_cmds, rm_dests=rm_dests)}
    except PermissionError as e: return {'error': e}

In [None]:
#| export
def add_allowed_cmds(cmds):
    "Add comma-separated `cmds` to the allow list; (this can not be used as an LLM tool)"
    ok_cmds.update(_split_specs(cmds))

def add_allowed_dests(dests):
    "Add comma-separated `dests` to the allow list; (this can not be used as an LLM tool)"
    ok_dests.update(_split_set(dests))

def rm_allowed_cmds(cmds:str):
    "Remove comma-separated `cmds` from the allow list"
    ok_cmds.difference_update({CmdSpec(c) for c in _split_set(cmds)})

def rm_allowed_dests(dests):
    "Remove comma-separated `dests` from the allow list"
    ok_dests.difference_update(_split_set(dests))

In [None]:
rm_allowed_cmds('ls')

In [None]:
bash('ls -l')

{'error': DisallowedCmd('ls -l')}

## CLI

In [None]:
#| export
import argparse,sys

In [None]:
#| export
def main():
    p = argparse.ArgumentParser(description='Run a command (kinda) safely')
    p.add_argument('cmd', nargs=argparse.REMAINDER, help='Command and arguments')
    args = p.parse_args()
    if not args.cmd: p.print_help(); sys.exit(1)
    try: print(safe_run(' '.join(args.cmd)))
    except DisallowedError as e: print(f"Command not allowed: {e}", file=sys.stderr); sys.exit(1)

The CLI provides a simple command-line interface to `safe_run`.

Usage:
```sh
safecmd ls -la
```

If you have pipes etc, you'll need to quote the whole command:

```sh
safecmd 'ls -la | grep py'
```

The command and all its arguments are joined back into a single string and passed to `safe_run`, which validates against the allowlist before execution. If the command isn't allowed, it shows an error and returns exit code of `1`.

This lets you use safecmd as a drop-in replacement for running untrusted commands from scripts or other tools: anything not in the allowlist is blocked before execution.