Skip to content

Commit

Permalink
Remove unused function split_file from file_operations.py (#4658)
Browse files Browse the repository at this point in the history
  • Loading branch information
Pwuts committed Jun 12, 2023
1 parent ff46c16 commit a9d177e
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 69 deletions.
32 changes: 0 additions & 32 deletions autogpt/commands/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,38 +119,6 @@ def log_operation(
)


def split_file(
content: str, max_length: int = 4000, overlap: int = 0
) -> Generator[str, None, None]:
"""
Split text into chunks of a specified maximum length with a specified overlap
between chunks.
:param content: The input text to be split into chunks
:param max_length: The maximum length of each chunk,
default is 4000 (about 1k token)
:param overlap: The number of overlapping characters between chunks,
default is no overlap
:return: A generator yielding chunks of text
"""
start = 0
content_length = len(content)

while start < content_length:
end = start + max_length
if end + overlap < content_length:
chunk = content[start : end + max(overlap - 1, 0)]
else:
chunk = content[start:content_length]

# Account for the case where the last chunk is shorter than the overlap, so it has already been consumed
if len(chunk) <= overlap:
break

yield chunk
start += max_length - overlap


@command("read_file", "Read a file", '"filename": "<filename>"')
def read_file(filename: str, agent: Agent) -> str:
"""Read a file and return the contents
Expand Down
37 changes: 0 additions & 37 deletions tests/unit/test_file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,43 +188,6 @@ def test_log_operation_with_checksum(agent: Agent):
assert f"log_test: path/to/test #ABCDEF\n" in content


@pytest.mark.parametrize(
"max_length, overlap, content, expected",
[
(
4,
1,
"abcdefghij",
["abcd", "defg", "ghij"],
),
(
4,
0,
"abcdefghijkl",
["abcd", "efgh", "ijkl"],
),
(
4,
0,
"abcdefghijklm",
["abcd", "efgh", "ijkl", "m"],
),
(
4,
0,
"abcdefghijk",
["abcd", "efgh", "ijk"],
),
],
)
# Test splitting a file into chunks
def test_split_file(max_length, overlap, content, expected):
assert (
list(file_ops.split_file(content, max_length=max_length, overlap=overlap))
== expected
)


def test_read_file(
mock_MemoryItem_from_text,
test_file_with_content_path: Path,
Expand Down

0 comments on commit a9d177e

Please sign in to comment.