Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Split markdown files when larger than max issue body size #265

Merged
merged 5 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/linters/.flake8
@@ -1,5 +1,5 @@
[flake8]
exclude = venv,.venv,.git,__pycache__
extend-ignore = C901
extend-ignore = C901, E203
max-line-length = 150
statistics = True
2 changes: 1 addition & 1 deletion .gitignore
@@ -1,5 +1,5 @@
# Output files
issue_metrics.md
issue_metrics*.md
issue_metrics.json

# Byte-compiled / optimized / DLL files
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -161,6 +161,7 @@ This action can be configured to authenticate with GitHub App Installation or Pe
- [Configuring the `SEARCH_QUERY`](./docs/search-query.md)
- [Local usage without Docker](./docs/local-usage-without-docker.md)
- [Authenticating with GitHub App Installation](./docs/authenticating-with-github-app-installation.md)
- [Dealing with large issue_metrics.md files](./docs/dealing-with-large-issue-metrics.md)

## Contributions

Expand Down
17 changes: 17 additions & 0 deletions docs/dealing-with-large-issue-metrics.md
@@ -0,0 +1,17 @@
# Dealing with large issue metrics Markdown files

When working with lots of issues/pull requests/discussion results, the resulting issue_metrics.md file can become very large. This can cause the GitHub API to return an error when trying to create an issue with the contents of the file.

```shell
Pull request creation failed. Validation failed: Body is too long (maximum is 65536 characters)
```

To work around this limitation, the issue-metrics action detects the large file size and splits the issue_metrics.md file into smaller files. So instead of issue_metrics.md, you will get issue_metrics_0.md, issue_metrics_1.md, etc.
Since we don't want the action to fail, it has been designed to have the same name as usual for the first split file (issue_metrics.md) and then append a number to the name for the subsequent split files.

You can choose one of the following strategies to deal with the split files:
- Create multiple issues, each with using the next split file in the sequence.
- Upload the full file as an artifact and link to it in the issue body.
- Create an issue and put the content of the split files as issue comments.

JSON output files are not split since its not anticipated that you use them as issue body content.
15 changes: 15 additions & 0 deletions issue_metrics.py
Expand Up @@ -20,6 +20,7 @@
main(): Run the issue-metrics script.
"""

import shutil
import sys
from typing import List, Union

Expand All @@ -30,6 +31,7 @@
from discussions import get_discussions
from json_writer import write_to_json
from labels import get_label_metrics, get_stats_time_in_labels
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
from markdown_writer import write_to_markdown
from most_active_mentors import count_comments_per_user, get_mentor_count
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
Expand Down Expand Up @@ -364,6 +366,7 @@ def main():
num_mentor_count,
search_query,
)

write_to_markdown(
issues_with_metrics,
stats_time_to_first_response,
Expand All @@ -377,6 +380,18 @@ def main():
search_query,
)

max_char_count = 65535
if markdown_too_large_for_issue_body("issue_metrics.md", max_char_count):
split_markdown_file("issue_metrics.md", max_char_count)
shutil.move("issue_metrics.md", "issue_metrics_full.md")
shutil.move("issue_metrics_0.md", "issue_metrics.md")
print(
"Issue metrics markdown file is too large for GitHub issue body and has been \
split into multiple files. ie. issue_metrics.md, issue_metrics_1.md, etc. \
The full file is saved as issue_metrics_full.md\n\
See https://github.com/github/issue-metrics/blob/main/docs/dealing-with-large-issue-metrics.md"
)


if __name__ == "__main__":
main()
38 changes: 38 additions & 0 deletions markdown_helpers.py
@@ -0,0 +1,38 @@
""" Helper functions for working with markdown files. """


def markdown_too_large_for_issue_body(file_path: str, max_char_count: int) -> bool:
"""
Check if the markdown file is too large to fit into a github issue.

Inputs:
file_path: str - the path to the markdown file to check
max_char_count: int - the maximum number of characters allowed in a github issue body

Returns:
bool - True if the file is too large, False otherwise

"""
with open(file_path, "r", encoding="utf-8") as file:
file_contents = file.read()
return len(file_contents) > max_char_count


def split_markdown_file(file_path: str, max_char_count: int) -> None:
"""
Split the markdown file into smaller files.

Inputs:
file_path: str - the path to the markdown file to split
max_char_count: int - the maximum number of characters allowed before splitting markdown file

"""
with open(file_path, "r", encoding="utf-8") as file:
file_contents = file.read()
contents_list = [
file_contents[i : i + max_char_count]
for i in range(0, len(file_contents), max_char_count)
]
for i, content in enumerate(contents_list):
with open(f"{file_path[:-3]}_{i}.md", "w", encoding="utf-8") as new_file:
new_file.write(content)
75 changes: 75 additions & 0 deletions test_markdown_helpers.py
@@ -0,0 +1,75 @@
""" Unit tests for the markdown_helpers module. """

import os
import unittest

from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file


class TestMarkdownHelpers(unittest.TestCase):
"""
Unit tests for the markdown_helpers module.
"""

def test_markdown_too_large_for_issue_body(self):
"""
Test the markdown_too_large_for_issue_body function.
"""
# Define a sample markdown file content
max_char_count = 65535
markdown_content = "a\n" * max_char_count

# Write the markdown content to a temporary file
with open("temp.md", "w", encoding="utf-8") as f:
f.write(markdown_content)

# Call the function with the temporary file
result = markdown_too_large_for_issue_body("temp.md", max_char_count)

# remove the temporary file
os.remove("temp.md")

# Assert that the function returns True
self.assertTrue(result)

def test_split_markdown_file(self):
"""
Test the split_markdown_file function.
"""

# Define a sample markdown file content with 4 times the maximum character count
multiple_of_max = 4
max_char_count = 65535
repeated_content = "a\n"
markdown_content = repeated_content * int(
(max_char_count * multiple_of_max) / len(repeated_content)
)

# Write the markdown content to a temporary file
with open("temp.md", "w", encoding="utf-8") as f:
f.write(markdown_content)

# Call the function with the temporary file
split_markdown_file("temp.md", max_char_count)

# Assert that the function creates two files
self.assertTrue(os.path.exists("temp_0.md"))
self.assertTrue(os.path.exists("temp_1.md"))
self.assertTrue(os.path.exists("temp_2.md"))
self.assertTrue(os.path.exists("temp_3.md"))

# Assert that the all files have less than max characters
for i in range(0, multiple_of_max):
with open(f"temp_{i}.md", "r", encoding="utf-8") as f:
self.assertLessEqual(len(f.read()), max_char_count)

# remove the temporary files
os.remove("temp.md")
os.remove("temp_0.md")
os.remove("temp_1.md")
os.remove("temp_2.md")
os.remove("temp_3.md")


if __name__ == "__main__":
unittest.main()