Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* [Configuring VectorCode](#configuring-vectorcode)
* [Vectorising Your Code](#vectorising-your-code)
* [File Specs](#file-specs)
* [Git Hooks](#git-hooks)
* [Making a Query](#making-a-query)
* [Listing All Collections](#listing-all-collections)
* [Removing a Collection](#removing-a-collection)
Expand Down Expand Up @@ -353,6 +354,35 @@ on certain conditions. See
[the wiki](https://github.com/Davidyz/VectorCode/wiki/Tips-and-Tricks#git-hooks)
for an example to use it with git hooks.

#### Git Hooks

To keep the embeddings up-to-date, you may find it useful to set up some git
hooks. The CLI provides a subcommand, `vectorcode hooks`, that helps you manage
hooks when working with a git repository. You can put some custom hooks in
`~/.config/vectorcode/hooks/` and the `vectorcode hooks` command will pick them
up and append them to your existing hooks, or create new hook scripts if they
don't exist yet. The hook files should be named the same as they would be under
the `.git/hooks` directory. For example, a pre-commit hook would be named
`~/.config/vectorcode/hooks/pre-commit`. By default, there are 2 pre-defined
hooks:
```bash
# pre-commit hook that vectorise changed files before you commit.
diff_files=$(git diff --cached --name-only)
[ -z "$diff_files" ] || vectorcode vectorise $diff_files
```
```bash
# post-checkout hook that vectorise changed files when you checkout to a
# different branch/tag/commit
files=$(git diff --name-only "$1" "$2")
[ -z "$files" ] || vectorcode vectorise $files
```
When you run `vectorcode hooks` in a git repo, these 2 hooks will be added to
your `.git/hooks/`. Hooks that are managed by VectorCode will be wrapped by
`# VECTORCODE_HOOK_START` and `# VECTORCODE_HOOK_END` comment lines. They help
VectorCode determine whether hooks have been added, so don't delete the markers
unless you know what you're doing. To remove the hooks, simply delete the lines
wrapped by these 2 comment strings.

### Making a Query

To retrieve a list of documents from the database, you can use the following command:
Expand Down
13 changes: 13 additions & 0 deletions src/vectorcode/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class CliAction(Enum):
clean = "clean"
prompts = "prompts"
chunks = "chunks"
hooks = "hooks"


@dataclass
Expand Down Expand Up @@ -283,6 +284,16 @@ def get_cli_parser():
)

subparsers.add_parser("drop", parents=[shared_parser], help="Remove a collection.")
hooks_parser = subparsers.add_parser(
"hooks", parents=[shared_parser], help="Inject git hooks."
)
hooks_parser.add_argument(
"--force",
"-f",
action="store_true",
default=False,
help="Override existing VectorCode hooks.",
)

init_parser = subparsers.add_parser(
"init",
Expand Down Expand Up @@ -379,6 +390,8 @@ async def parse_cli_args(args: Optional[Sequence[str]] = None):
configs_items["chunk_size"] = main_args.chunk_size
configs_items["overlap_ratio"] = main_args.overlap
configs_items["encoding"] = main_args.encoding
case "hooks":
configs_items["force"] = main_args.force
return Config(**configs_items)


Expand Down
4 changes: 4 additions & 0 deletions src/vectorcode/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ async def async_main():
from vectorcode.subcommands import chunks

return_val = await chunks(final_configs)
case CliAction.hooks:
from vectorcode.subcommands import hooks

return await hooks(cli_args)

from vectorcode.common import start_server, try_server

Expand Down
2 changes: 2 additions & 0 deletions src/vectorcode/subcommands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from vectorcode.subcommands.chunks import chunks
from vectorcode.subcommands.clean import clean
from vectorcode.subcommands.drop import drop
from vectorcode.subcommands.hooks import hooks
from vectorcode.subcommands.init import init
from vectorcode.subcommands.ls import ls
from vectorcode.subcommands.prompt import prompts
Expand All @@ -14,6 +15,7 @@
"chunks",
"clean",
"drop",
"hooks",
"init",
"ls",
"prompts",
Expand Down
105 changes: 105 additions & 0 deletions src/vectorcode/subcommands/hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import glob
import logging
import os
import platform
import re
import stat
from pathlib import Path
from typing import Optional

from vectorcode.cli_utils import GLOBAL_CONFIG_PATH, Config, find_project_root

logger = logging.getLogger(name=__name__)
__GLOBAL_HOOKS_PATH = Path(GLOBAL_CONFIG_PATH).parent / "hooks"


# Keys: name of the hooks, ie. `pre-commit`
# Values: lines of the hooks.
__HOOK_CONTENTS: dict[str, list[str]] = {
"pre-commit": [
"diff_files=$(git diff --cached --name-only)",
'[ -z "$diff_files" ] || vectorcode vectorise $diff_files',
],
"post-checkout": [
'files=$(git diff --name-only "$1" "$2")',
'[ -z "$files" ] || vectorcode vectorise $files',
],
}


def __lines_are_empty(lines: list[str]) -> bool:
pattern = re.compile(r"^\s*$")
if len(lines) == 0:
return True
return all(map(lambda line: pattern.match(line) is not None, lines))


def load_hooks():
global __HOOK_CONTENTS
for file in glob.glob(str(__GLOBAL_HOOKS_PATH / "*")):
hook_name = Path(file).stem
with open(file) as fin:
lines = fin.readlines()
if not __lines_are_empty(lines):
__HOOK_CONTENTS[hook_name] = lines


class HookFile:
prefix = "# VECTORCODE_HOOK_START"
suffix = "# VECTORCODE_HOOK_END"
prefix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_START\s*")
suffix_pattern = re.compile(r"^\s*#\s*VECTORCODE_HOOK_END\s*")

def __init__(self, path: str | Path, git_dir: Optional[str | Path] = None):
self.path = path
self.lines: list[str] = []
if os.path.isfile(self.path):
with open(self.path) as fin:
self.lines.extend(fin.readlines())

def has_vectorcode_hooks(self, force: bool = False) -> bool:
for start, start_line in enumerate(self.lines):
if self.prefix_pattern.match(start_line) is None:
continue

for end in range(start + 1, len(self.lines)):
if self.suffix_pattern.match(self.lines[end]) is not None:
if force:
logger.debug("`force` cleaning existing VectorCode hooks...")
new_lines = self.lines[:start] + self.lines[end + 1 :]
self.lines[:] = new_lines
return False
logger.debug(
f"Found vectorcode hook block between line {start} and {end} in {self.path}:\n{''.join(self.lines[start + 1 : end])}"
)
return True

return False

def inject_hook(self, content: list[str], force: bool = False):
if len(self.lines) == 0 or not self.has_vectorcode_hooks(force):
self.lines.append(self.prefix + "\n")
self.lines.extend(i if i.endswith("\n") else i + "\n" for i in content)
self.lines.append(self.suffix + "\n")
with open(self.path, "w") as fin:
fin.writelines(self.lines)
if platform.system() != "Windows":
# for unix systems, set the executable bit.
curr_mode = os.stat(self.path).st_mode
os.chmod(self.path, mode=curr_mode | stat.S_IXUSR)


async def hooks(configs: Config) -> int:
project_root = configs.project_root or "."
git_root = find_project_root(project_root, ".git")
if git_root is None:
logger.error(f"{project_root} is not inside a git repo directory!")
return 1
load_hooks()
for hook in __HOOK_CONTENTS.keys():
hook_file_path = os.path.join(git_root, ".git", "hooks", hook)
logger.info(f"Writing {hook} hook into {hook_file_path}.")
print(f"Processing {hook} hook...")
hook_obj = HookFile(hook_file_path, git_dir=git_root)
hook_obj.inject_hook(__HOOK_CONTENTS[hook], configs.force)
return 0
Loading