From b5e84715e9cd2874d2069b96a12fd01c4e5f9593 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 09:33:48 +0200 Subject: [PATCH 1/8] Backup before code debug. --- integration_test.py | 22 ++ release_notes_generator/data/__init__.py | 0 release_notes_generator/{ => data}/filter.py | 76 +++--- release_notes_generator/{ => data}/miner.py | 168 +++++++++--- .../data/utils/__init__.py | 0 .../data/utils/bulk_sub_issue_collector.py | 226 ++++++++++++++++ release_notes_generator/generator.py | 8 +- release_notes_generator/model/mined_data.py | 10 +- release_notes_generator/model/record.py | 2 + .../record/factory/default_record_factory.py | 69 ++--- .../factory/issue_hierarchy_record_factory.py | 248 +++++++++--------- release_notes_generator/utils/record_utils.py | 52 ++++ tests/release_notes/data/__init__.py | 0 tests/{ => release_notes/data}/test_filter.py | 2 +- tests/{ => release_notes/data}/test_miner.py | 2 +- .../record/factory/utils/__init__.py | 0 16 files changed, 638 insertions(+), 247 deletions(-) create mode 100644 integration_test.py create mode 100644 release_notes_generator/data/__init__.py rename release_notes_generator/{ => data}/filter.py (68%) rename release_notes_generator/{ => data}/miner.py (60%) create mode 100644 release_notes_generator/data/utils/__init__.py create mode 100644 release_notes_generator/data/utils/bulk_sub_issue_collector.py create mode 100644 release_notes_generator/utils/record_utils.py create mode 100644 tests/release_notes/data/__init__.py rename tests/{ => release_notes/data}/test_filter.py (98%) rename tests/{ => release_notes/data}/test_miner.py (99%) create mode 100644 tests/release_notes/record/factory/utils/__init__.py diff --git a/integration_test.py b/integration_test.py new file mode 100644 index 00000000..dce07478 --- /dev/null +++ b/integration_test.py @@ -0,0 +1,22 @@ +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +import os + +from release_notes_generator.data.utils.bulk_sub_issue_collector import CollectorConfig, BulkSubIssueCollector + +token = os.getenv("GITHUB_TOKEN") + +# If you need to disable TLS verification (to mirror your example): +cfg = CollectorConfig(verify_tls=False) + +collector = BulkSubIssueCollector(token, cfg=cfg) + +new_parents = [ + "absa-group/AUL#2960", +] + +while new_parents: + new_parents = collector.scan_sub_issues_for_parents(new_parents) + print("New parents found:", new_parents) + print("Collected sub-issues so far:", collector.parents_sub_issues) diff --git a/release_notes_generator/data/__init__.py b/release_notes_generator/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/release_notes_generator/filter.py b/release_notes_generator/data/filter.py similarity index 68% rename from release_notes_generator/filter.py rename to release_notes_generator/data/filter.py index b3dd5778..9f733c60 100644 --- a/release_notes_generator/filter.py +++ b/release_notes_generator/data/filter.py @@ -20,6 +20,10 @@ from copy import deepcopy from typing import Optional +from github.Issue import Issue +from github.PullRequest import PullRequest +from github.Repository import Repository + from release_notes_generator.action_inputs import ActionInputs from release_notes_generator.model.mined_data import MinedData @@ -68,39 +72,43 @@ def filter(self, data: MinedData) -> MinedData: if data.release is not None: logger.info("Starting issue, prs and commit reduction by the latest release since time.") - issues_list = self._filter_issues(data) - logger.debug("Count of issues reduced from %d to %d", len(data.issues), len(issues_list)) + issues_dict = self._filter_issues(data) + logger.debug("Count of issues reduced from %d to %d", len(data.issues), len(issues_dict)) # filter out merged PRs and commits before the date pulls_seen: set[int] = set() - pulls_list: list = [] - for pull in data.pull_requests: + pulls_dict: dict[PullRequest, Repository] = {} + for pull, repo in data.pull_requests.items(): if (pull.merged_at is not None and pull.merged_at >= data.since) or ( pull.closed_at is not None and pull.closed_at >= data.since ): if pull.number not in pulls_seen: pulls_seen.add(pull.number) - pulls_list.append(pull) - logger.debug("Count of pulls reduced from %d to %d", len(data.pull_requests), len(pulls_list)) + pulls_dict[pull] = repo + logger.debug("Count of pulls reduced from %d to %d", len(data.pull_requests.items()), len(pulls_dict.items())) - commits_list = list(filter(lambda commit: commit.commit.author.date > data.since, data.commits)) - logger.debug("Count of commits reduced from %d to %d", len(data.commits), len(commits_list)) + commits_dict = { + commit: repo + for commit, repo in data.commits.items() + if commit.commit.author.date > data.since + } + logger.debug("Count of commits reduced from %d to %d", len(data.commits.items()), len(commits_dict.items())) - md.issues = issues_list - md.pull_requests = pulls_list - md.commits = commits_list + md.issues = issues_dict + md.pull_requests = pulls_dict + md.commits = commits_dict logger.debug( "Input data. Issues: %d, Pull Requests: %d, Commits: %d", - len(data.issues), - len(data.pull_requests), - len(data.commits), + len(data.issues.items()), + len(data.pull_requests.items()), + len(data.commits.items()), ) logger.debug( "Filtered data. Issues: %d, Pull Requests: %d, Commits: %d", - len(md.issues), - len(md.pull_requests), - len(md.commits), + len(md.issues.items()), + len(md.pull_requests.items()), + len(md.commits.items()), ) else: md.issues = deepcopy(data.issues) @@ -109,12 +117,15 @@ def filter(self, data: MinedData) -> MinedData: return md - def _filter_issues(self, data: MinedData) -> list: + def _filter_issues(self, data: MinedData) -> dict[Issue, Repository]: """ Filter issues based on the selected filtering type - default or hierarchy. - @param data: The mined data containing issues. - @return: The filtered list of issues. + Parameters: + data (MinedData): The mined data to filter. + + Returns: + dict[Issue, Repository]: The filtered issues. """ if ActionInputs.get_hierarchy(): logger.debug("Used hierarchy issue filtering logic.") @@ -124,7 +135,7 @@ def _filter_issues(self, data: MinedData) -> list: return self._filter_issues_default(data) @staticmethod - def _filter_issues_default(data: MinedData) -> list: + def _filter_issues_default(data: MinedData) -> dict[Issue, Repository]: """ Default filtering for issues: filter out closed issues before the release date. @@ -132,12 +143,12 @@ def _filter_issues_default(data: MinedData) -> list: data (MinedData): The mined data containing issues and release information. Returns: - list: The filtered list of issues. + dict[Issue, Repository]: The filtered issues. """ - return [issue for issue in data.issues if (issue.closed_at is None) or (issue.closed_at >= data.since)] + return {issue: repo for issue, repo in data.issues.items() if (issue.closed_at is None) or (issue.closed_at >= data.since)} @staticmethod - def _filter_issues_issue_hierarchy(data: MinedData) -> list: + def _filter_issues_issue_hierarchy(data: MinedData) -> dict[Issue, Repository]: """ Hierarchy filtering for issues: include issues closed since the release date or still open at generation time. @@ -146,14 +157,13 @@ def _filter_issues_issue_hierarchy(data: MinedData) -> list: data (MinedData): The mined data containing issues and release information. Returns: - list: The filtered list of issues. + dict[Issue, Repository]: The filtered issues. """ - return list( - filter( - lambda issue: ( - (issue.closed_at is not None and issue.closed_at >= data.since) # closed after the release - or (issue.state == "open") # still open - ), - data.issues, + return { + issue: repo + for issue, repo in data.issues.items() + if ( + (issue.closed_at is not None and issue.closed_at >= data.since) + or (issue.state == "open") ) - ) + } diff --git a/release_notes_generator/miner.py b/release_notes_generator/data/miner.py similarity index 60% rename from release_notes_generator/miner.py rename to release_notes_generator/data/miner.py index 8e34434d..fa55d924 100644 --- a/release_notes_generator/miner.py +++ b/release_notes_generator/data/miner.py @@ -21,19 +21,24 @@ import logging import sys import traceback +from builtins import list from typing import Optional import semver from github import Github from github.GitRelease import GitRelease +from github.Issue import Issue +from github.PullRequest import PullRequest from github.Repository import Repository from release_notes_generator.action_inputs import ActionInputs +from release_notes_generator.data.utils.bulk_sub_issue_collector import CollectorConfig, BulkSubIssueCollector from release_notes_generator.model.issue_record import IssueRecord from release_notes_generator.model.mined_data import MinedData from release_notes_generator.model.pull_request_record import PullRequestRecord from release_notes_generator.utils.decorators import safe_call_decorator from release_notes_generator.utils.github_rate_limiter import GithubRateLimiter +from release_notes_generator.utils.record_utils import get_id, parse_issue_id logger = logging.getLogger(__name__) @@ -47,6 +52,118 @@ def __init__(self, github_instance: Github, rate_limiter: GithubRateLimiter): self.github_instance = github_instance self._safe_call = safe_call_decorator(rate_limiter) + def mine_data(self) -> MinedData: + """ + Mines data from GitHub, including repository information, issues, pull requests, commits, and releases. + """ + logger.info("Starting data mining from GitHub...") + repo: Optional[Repository] = self.get_repository(ActionInputs.get_github_repository()) + if repo is None: + raise ValueError("Repository not found") + + data = MinedData(repo) + data.release = self.get_latest_release(repo) + + self._get_issues(data) + + # pulls and commits, and then reduce them by the latest release since time + pull_requests = list(self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_CLOSED, base=repo.default_branch)) + open_pull_requests = list(self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_OPEN, base=repo.default_branch)) + data.pull_requests = {pr: data.home_repository for pr in pull_requests} + if data.since: + commits = list(self._safe_call(repo.get_commits)(since=data.since)) + else: + commits = list(self._safe_call(repo.get_commits)()) + data.commits = {c: data.home_repository for c in commits} + + logger.info("Initial data mining from GitHub completed.") + + logger.info("Filtering duplicated issues from the list of issues...") + de_duplicated_data = self.__filter_duplicated_issues(data, open_pull_requests) + logger.info("Filtering duplicated issues from the list of issues finished.") + + return de_duplicated_data + + def mine_missing_sub_issues(self, data: MinedData) -> dict[Issue, Repository]: + logger.debug("Mapping sub-issues...") + data.parents_sub_issues = self._scan_sub_issues_for_parents([get_id(i, r) for i, r in data.issues.items()]) + + logger.debug("Fetching missing issues...") + return self._fetch_missing_issues_and_prs(data) + + def _scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> dict[str, list[str]]: + """ + Scan sub-issues for parents. + + Parameters: + parents_to_check (list[str]): List of parent issue IDs to check. + Returns: + dict[str, list[str]]: A dictionary mapping parent issue IDs to their sub-issue IDs. + """ + new_parent_ids: list[str] = parents_to_check + cfg = CollectorConfig(verify_tls=False) + bulk_sub_issue_collector = BulkSubIssueCollector(ActionInputs.get_github_token(), cfg) + parents_sub_issues: dict[str, list[str]] = {} + + # run in cycle to get all levels of hierarchy + while new_parent_ids: + logger.debug("Scanning sub-issues with parent ids: %s", new_parent_ids) + new_parent_ids = bulk_sub_issue_collector.scan_sub_issues_for_parents(parents_to_check) + parents_sub_issues.update(bulk_sub_issue_collector.parents_sub_issues) + + return parents_sub_issues + + def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Repository]: + """ + Fetch missing issues. + + Parameters: + data (MinedData): The mined data containing origin sets of issues and pull requests. + Returns: + list[Issue]: A list of fetched missing issues. + """ + fetched_issues: dict[Issue, Repository] = {} + + origin_issue_ids = {get_id(i, r) for i, r in data.issues.items()} + for parent_id in data.parents_sub_issues.keys(): + if parent_id in origin_issue_ids: + continue + + # fetch issue by id + org, repo, num = parse_issue_id(parent_id) + + if not self.get_repository(f"{org}/{repo}"): + new_repo = self._get_repository(f"{org}/{repo}") + if new_repo is not None: + # cache for subsequent lookups + data.add_repository(new_repo) + + issue = self._safe_call(self.get_repository(f"{org}/{repo}").get_issue)(int(parent_id)) + if issue is None: + logger.error("Issue not found: %s", parent_id) + continue + else: + logger.debug("Fetching missing issue: %s", parent_id) + + # add to issues list + fetched_issues[issue] = data.get_repository(f"{org}/{repo}") + + logger.debug("Fetched %d missing issues.", len(fetched_issues)) + return fetched_issues + + def _get_repository(self, full_name: str) -> Optional[Repository]: + """ + Retrieves the specified GitHub repository. + + Returns: + Optional[Repository]: The GitHub repository if found, None otherwise. + """ + repo: Optional[Repository] = self._safe_call(self.github_instance.get_repo)(full_name) + if repo is None: + logger.error("Repository not found: %s", full_name) + return None + return repo + def check_repository_exists(self) -> bool: """ Checks if the specified GitHub repository exists. @@ -73,32 +190,6 @@ def get_repository(self, full_name: str) -> Optional[Repository]: return None return repo - def mine_data(self) -> MinedData: - """ - Mines data from GitHub, including repository information, issues, pull requests, commits, and releases. - """ - logger.info("Starting data mining from GitHub...") - repo: Optional[Repository] = self.get_repository(ActionInputs.get_github_repository()) - if repo is None: - raise ValueError("Repository not found") - - data = MinedData(repo) - data.release = self.get_latest_release(repo) - - self._get_issues(data) - - # pulls and commits, and then reduce them by the latest release since time - data.pull_requests = list(self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_CLOSED)) - data.commits = list(self._safe_call(repo.get_commits)()) - - logger.info("Data mining from GitHub completed.") - - logger.info("Filtering duplicated issues from the list of issues...") - de_duplicated_data = self.__filter_duplicated_issues(data) - logger.info("Filtering duplicated issues from the list of issues finished.") - - return de_duplicated_data - def get_latest_release(self, repository: Repository) -> Optional[GitRelease]: """ Get the latest release of the repository. @@ -139,7 +230,7 @@ def get_latest_release(self, repository: Repository) -> Optional[GitRelease]: return rls - def _get_issues(self, data: MinedData): + def _get_issues(self, data: MinedData) -> None: """ Populate data.issues. @@ -152,8 +243,10 @@ def _get_issues(self, data: MinedData): logger.info("Fetching issues from repository...") if data.release is None: - data.issues = list(self._safe_call(data.home_repository.get_issues)(state=IssueRecord.ISSUE_STATE_ALL)) - logger.info("Fetched %d issues", len(data.issues)) + issues = list(self._safe_call(data.home_repository.get_issues)(state=IssueRecord.ISSUE_STATE_ALL)) + data.issues = {i: data.home_repository for i in issues} + + logger.info("Fetched %d issues", len(data.issues.items())) return # Derive 'since' from release @@ -184,7 +277,7 @@ def _get_issues(self, data: MinedData): if num is not None and num not in by_number: by_number[num] = issue - data.issues = list(by_number.values()) + data.issues = {i: data.home_repository for i in list(by_number.values())} logger.info("Fetched %d issues (deduplicated).", len(data.issues)) @staticmethod @@ -213,21 +306,24 @@ def __get_latest_semantic_release(releases) -> Optional[GitRelease]: return rls @staticmethod - def __filter_duplicated_issues(data: MinedData) -> "MinedData": + def __filter_duplicated_issues(data: MinedData, open_pull_requests: list[PullRequest]) -> "MinedData": """ Filters out duplicated issues from the list of issues. This method address problem in output of GitHub API where issues list contains PR values. Parameters: - - data (MinedData): The mined data containing issues and pull requests. + data (MinedData): The mined data containing issues and pull requests. + open_pull_requests (list[PullRequest]): List of currently open pull requests. Returns: - - MinedData: The mined data with duplicated issues removed. + MinedData: The mined data with duplicated issues removed. """ - pr_numbers = {pr.number for pr in data.pull_requests} - filtered_issues = [issue for issue in data.issues if issue.number not in pr_numbers] + pr_numbers = {pr.number for pr in data.pull_requests.keys()} + open_pr_numbers = [pr.number for pr in open_pull_requests] + + filtered_issues = {issue: repo for issue, repo in data.issues.items() if issue.number not in pr_numbers and issue.number not in open_pr_numbers} - logger.debug("Duplicated issues removed: %s", len(data.issues) - len(filtered_issues)) + logger.debug("Duplicated issues removed: %s", len(data.issues.items()) - len(filtered_issues.items())) data.issues = filtered_issues diff --git a/release_notes_generator/data/utils/__init__.py b/release_notes_generator/data/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/release_notes_generator/data/utils/bulk_sub_issue_collector.py b/release_notes_generator/data/utils/bulk_sub_issue_collector.py new file mode 100644 index 00000000..3e359f93 --- /dev/null +++ b/release_notes_generator/data/utils/bulk_sub_issue_collector.py @@ -0,0 +1,226 @@ +from __future__ import annotations + +import json +import logging +import time +from dataclasses import dataclass +from typing import Dict, List, Optional, Set, Tuple + +import requests + +from release_notes_generator.utils.record_utils import parse_issue_id, format_issue_id + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class CollectorConfig: + """ + Configuration options for BulkSubIssueCollector. + Override defaults when instantiating if you need custom behavior. + """ + + api_url: str = "https://api.github.com/graphql" + timeout: float = 12.0 + verify_tls: bool = True + + # Retry/backoff + max_retries: int = 3 + base_backoff: float = 1.0 + + # Pagination and batching + per_page: int = 100 # Max allowed by GitHub for subIssues + max_parents_per_repo: int = 2 # Max issue aliases per repository(...) block + max_repos_per_request: int = 8 # Max repository blocks per query + + # Pacing + gentle_pacing_seconds: float = 0.05 + + +class BulkSubIssueCollector: + """ + Collect sub-issues for received parent issues in bulk via GitHub GraphQL API. + Prepare list of new parents build from found sub-issues. + """ + + def __init__( + self, + token: str, + cfg: Optional[CollectorConfig] = None, + session: Optional[requests.Session] = None, + ): + self._cfg = cfg or CollectorConfig() + self._session = session or requests.Session() + self._headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + # Parent -> list of its direct sub-issues ("org/repo#n") + self.parents_sub_issues: dict[str, list[str]] = {} + + def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: + """ + Input: ["org/repo#123", "org2/repo2#77", ...] + Output: list of *new parent* IDs in the same format (unique, sorted). + Side-effect: self.parents_sub_issues[parent_id] = [child_ids...] + """ + if not parents_to_check: + return [] + + new_parents_to_check: Set[str] = set() + self.parents_sub_issues = {} + + by_repo: dict[tuple[str, str], list[int]] = {} + originals: set[str] = set() + + for raw in parents_to_check: + org, repo, num = parse_issue_id(raw) + by_repo.setdefault((org, repo), []).append(num) + originals.add(raw) + + # Outer chunk by repositories to keep queries within safe length. + repo_items = list(by_repo.items()) + for i in range(0, len(repo_items), self._cfg.max_repos_per_request): + repo_chunk = repo_items[i : i + self._cfg.max_repos_per_request] + + # Maintain cursors per (org, repo, issue). + cursors: Dict[Tuple[str, str, int], Optional[str]] = {} + remaining_by_repo: Dict[Tuple[str, str], Set[int]] = { + k: set(v) for k, v in repo_chunk + } + for (org, repo), nums in remaining_by_repo.items(): + for n in nums: + cursors[(org, repo, n)] = None + + # Continue until all parents in this chunk are fully paginated. + while any(remaining_by_repo.values()): + # Build one GraphQL query with up to max_repos_per_request repos, + # each with up to max_parents_per_repo parent issues that still have pages. + repo_blocks: List[str] = [] + alias_maps: Dict[str, Tuple[str, str, int]] = {} # alias -> (org, repo, parent_num) + + for r_idx, ((org, repo), parents_rem) in enumerate(remaining_by_repo.items()): + if not parents_rem: + continue + current_parents = list(parents_rem)[: self._cfg.max_parents_per_repo] + issue_blocks: List[str] = [] + for p_idx, parent_num in enumerate(current_parents): + alias = f"i{r_idx}_{p_idx}" + alias_maps[alias] = (org, repo, parent_num) + after = cursors[(org, repo, parent_num)] + after_part = f', after: "{after}"' if after else "" + issue_blocks.append( + f"""{alias}: issue(number: {parent_num}) {{ + number + subIssues(first: {self._cfg.per_page}{after_part}) {{ + nodes {{ + number + repository {{ owner {{ login }} name }} + # only count to decide if child is also a parent + subIssues(first: 0) {{ totalCount }} + }} + pageInfo {{ hasNextPage endCursor }} + }} + }}""" + ) + if issue_blocks: + repo_alias = f"r{r_idx}" + repo_blocks.append( + f"""{repo_alias}: repository(owner: "{org}", name: "{repo}") {{ + {' '.join(issue_blocks)} + }}""" + ) + + if not repo_blocks: + break + + query = f"query Bulk {{ {' '.join(repo_blocks)} }}" + data = self._post_graphql({"query": query}) + + # Parse results: top-level 'data' contains our repo aliases + d_repo = data.get("data", {}) + + for alias, (org, repo, parent_num) in alias_maps.items(): + issue_node = self._find_alias_node(d_repo, alias) + parent_id = format_issue_id(org, repo, parent_num) + + if issue_node is None: + # Parent not found / no access — mark as complete + remaining_by_repo[(org, repo)].discard(parent_num) + # Ensure map key exists (empty list) + self.parents_sub_issues.setdefault(parent_id, []) + logger.info("No sub-issues found for parent %s.", parent_id) + continue + + conn = issue_node["subIssues"] + child_ids: list[str] = self.parents_sub_issues.setdefault(parent_id, []) + + for child in conn.get("nodes", []): + child_num = child["number"] + child_org = child["repository"]["owner"]["login"] + child_repo = child["repository"]["name"] + child_id = format_issue_id(child_org, child_repo, child_num) + # Save every direct child in the mapping (no duplicates) + if child_id not in child_ids: + child_ids.append(child_id) + + # If the child has children, it's a "new parent" + if child["subIssues"]["totalCount"] > 0 and child_id not in originals: + new_parents_to_check.add(child_id) + + logger.debug("Sub-issues found for parent %s: %s", parent_id, child_ids) + + page = conn["pageInfo"] + if page["hasNextPage"]: + cursors[(org, repo, parent_num)] = page["endCursor"] + else: + remaining_by_repo[(org, repo)].discard(parent_num) + + # Gentle pacing to avoid secondary limits + time.sleep(0.05) + + # Deterministic order + return sorted(new_parents_to_check, key=lambda s: (lambda o, r, n: (o, r, n))(*parse_issue_id(s))) + + # ---------- internals ---------- + + def _post_graphql(self, payload: dict) -> dict: + logger.info("Posting graphql payload: %s", payload) + last_exc: Optional[Exception] = None + for attempt in range(1, self._cfg.max_retries + 1): + try: + logger.debug("Posting graphql query") + resp = self._session.post( + self._cfg.api_url, + headers=self._headers, + data=json.dumps(payload), + verify=self._cfg.verify_tls, + timeout=self._cfg.timeout, + ) + resp.raise_for_status() + data = resp.json() + if "errors" in data and data["errors"]: + logger.error("GraphQL errors: %s", data["errors"]) + raise RuntimeError(f"GitHub GraphQL errors: {data['errors']}") + logger.debug("Posted graphql query") + return data + except Exception as e: + logger.exception("GraphQL query failed") + last_exc = e + if attempt == self._cfg.max_retries: + raise + time.sleep(self._cfg.base_backoff * attempt) + if last_exc: + raise last_exc + raise RuntimeError("GraphQL POST failed without exception.") + + def _find_alias_node(self, repo_block: dict, alias: str) -> Optional[dict]: + """ + Given top-level 'data' (mapping of repo aliases -> repo object), + return the 'issue' object under whichever repo alias contains our issue alias. + """ + for _, repo_obj in repo_block.items(): + if isinstance(repo_obj, dict) and alias in repo_obj: + return repo_obj[alias] + return None diff --git a/release_notes_generator/generator.py b/release_notes_generator/generator.py index 07ced620..f43c4ff0 100644 --- a/release_notes_generator/generator.py +++ b/release_notes_generator/generator.py @@ -26,8 +26,8 @@ from github import Github from github.Repository import Repository -from release_notes_generator.filter import FilterByRelease -from release_notes_generator.miner import DataMiner +from release_notes_generator.data.filter import FilterByRelease +from release_notes_generator.data.miner import DataMiner from release_notes_generator.action_inputs import ActionInputs from release_notes_generator.builder.builder import ReleaseNotesBuilder from release_notes_generator.chapters.custom_chapters import CustomChapters @@ -90,6 +90,10 @@ def generate(self) -> Optional[str]: filterer = FilterByRelease() data_filtered_by_release = filterer.filter(data=data) + # data expansion when hierarchy is enabled + if ActionInputs.get_hierarchy(): + data.issues.update(miner.mine_missing_sub_issues(data)) + changelog_url: str = get_change_url( tag_name=ActionInputs.get_tag_name(), repository=data_filtered_by_release.home_repository, diff --git a/release_notes_generator/model/mined_data.py b/release_notes_generator/model/mined_data.py index f9d4c083..204dec06 100644 --- a/release_notes_generator/model/mined_data.py +++ b/release_notes_generator/model/mined_data.py @@ -42,11 +42,13 @@ def __init__(self, repository: Repository): self._home_repository_full_name: str = repository.full_name self._repositories: dict[str, Repository] = {repository.full_name: repository} self.release: Optional[GitRelease] = None - self.issues: list[Issue] = [] - self.pull_requests: list[PullRequest] = [] - self.commits: list[Commit] = [] + self.issues: dict[Issue, Repository] = {} + self.pull_requests: dict[PullRequest, Repository] = {} + self.commits: dict[Commit, Repository] = {} self.since = datetime(1970, 1, 1) # Default to epoch start + self.parents_sub_issues: dict[str, list[str]] = {} # parent issue id -> list of its sub-issues ids + @property def home_repository(self) -> Repository: """Get the home repository.""" @@ -71,4 +73,4 @@ def is_empty(self): Returns: bool: True if empty, False otherwise. """ - return self.issues == [] and self.pull_requests == [] and self.commits == [] + return not (self.issues or self.pull_requests or self.commits) diff --git a/release_notes_generator/model/record.py b/release_notes_generator/model/record.py index e9a8cfda..b8ec8bae 100644 --- a/release_notes_generator/model/record.py +++ b/release_notes_generator/model/record.py @@ -22,6 +22,8 @@ from abc import ABCMeta, abstractmethod from typing import Optional +from github.Repository import Repository + logger = logging.getLogger(__name__) diff --git a/release_notes_generator/record/factory/default_record_factory.py b/release_notes_generator/record/factory/default_record_factory.py index 0eb020b9..9c16d541 100644 --- a/release_notes_generator/record/factory/default_record_factory.py +++ b/release_notes_generator/record/factory/default_record_factory.py @@ -19,7 +19,6 @@ """ import logging -from functools import singledispatchmethod, lru_cache from typing import cast, Optional from github import Github @@ -39,10 +38,10 @@ from release_notes_generator.utils.decorators import safe_call_decorator from release_notes_generator.utils.github_rate_limiter import GithubRateLimiter from release_notes_generator.utils.pull_request_utils import get_issues_for_pr, extract_issue_numbers_from_body +from release_notes_generator.utils.record_utils import get_id logger = logging.getLogger(__name__) - class DefaultRecordFactory(RecordFactory): """ A class used to generate records for release notes. @@ -56,49 +55,19 @@ def __init__(self, github: Github, home_repository: Repository) -> None: self._records: dict[str, Record] = {} - @singledispatchmethod # pylint: disable=abstract-method - def get_id(self, obj) -> str: - """ - Get the ID of an object. - - Parameters: - obj: The object to get the ID of. - - Returns: - str: The ID of the object. - """ - raise NotImplementedError(f"Unsupported type: {type(obj)}") - - @get_id.register - def _(self, issue: Issue) -> str: - # delegate to a cached, hashable-only helper - return self._issue_id(issue.repository.full_name, issue.number) - - @staticmethod - @lru_cache(maxsize=2048) - def _issue_id(repo_full_name: str, number: int) -> str: - return f"{repo_full_name}#{number}" - - @get_id.register - def _(self, pull_request: PullRequest) -> str: - return f"{self._home_repository.full_name}#{pull_request.number}" - - @get_id.register - def _(self, commit: Commit) -> str: - return f"{commit.sha}" - - def get_repository(self, full_name: str) -> Optional[Repository]: - """ - Retrieves the specified GitHub repository. - - Returns: - Optional[Repository]: The GitHub repository if found, None otherwise. - """ - repo: Optional[Repository] = self._safe_call(self._github.get_repo)(full_name) - if repo is None: - logger.error("Repository not found: %s", full_name) - return None - return repo + # TODO - this should not be needed now - delete + # def get_repository(self, full_name: str) -> Optional[Repository]: + # """ + # Retrieves the specified GitHub repository. + # + # Returns: + # Optional[Repository]: The GitHub repository if found, None otherwise. + # """ + # repo: Optional[Repository] = self._safe_call(self._github.get_repo)(full_name) + # if repo is None: + # logger.error("Repository not found: %s", full_name) + # return None + # return repo def generate(self, data: MinedData) -> dict[str, Record]: """ @@ -110,7 +79,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: """ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: - l_pid = self.get_id(pr) + l_pid = get_id(pr) l_pull_labels = [label.name for label in pr.get_labels()] attached_any = False detected_issues = extract_issue_numbers_from_body(pr, repository=data.home_repository) @@ -136,7 +105,7 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: except ValueError: logger.error("Invalid parent issue id: %s", parent_issue_id) continue - parent_repository = data.get_repository(pi_repo_name) or self.get_repository(pi_repo_name) + parent_repository = data.get_repository(pi_repo_name) if parent_repository is not None: # cache for subsequent lookups if data.get_repository(pi_repo_name) is None: @@ -170,7 +139,7 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: logger.debug("Registering pull requests to records...") for pull in data.pull_requests: - pid = self.get_id(pull) + pid = get_id(pull) pull_labels = [label.name for label in pull.get_labels()] skip_record: bool = any(item in pull_labels for item in ActionInputs.get_skip_release_notes_labels()) @@ -217,7 +186,7 @@ def register_commit_to_record(self, commit: Commit) -> bool: rec_pr.register_commit(commit) return True - self._records[self.get_id(commit)] = CommitRecord(commit=commit) + self._records[get_id(commit)] = CommitRecord(commit=commit) logger.debug("Created record for direct commit %s: %s", commit.sha, commit.commit.message) return False @@ -236,5 +205,5 @@ def _create_record_for_issue(self, issue: Issue, issue_labels: Optional[list[str if issue_labels is None: issue_labels = [label.name for label in issue.get_labels()] skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) - self._records[iid := self.get_id(issue)] = IssueRecord(issue=issue, skip=skip_record, issue_labels=issue_labels) + self._records[iid := get_id(issue)] = IssueRecord(issue=issue, skip=skip_record, issue_labels=issue_labels) logger.debug("Created record for non hierarchy issue '%s': %s", iid, issue.title) diff --git a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py index 360e8934..4c6f9d05 100644 --- a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py +++ b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py @@ -22,7 +22,7 @@ from typing import cast, Optional from github import Github -from github.Issue import Issue, SubIssue +from github.Issue import Issue from github.PullRequest import PullRequest from github.Repository import Repository @@ -37,6 +37,7 @@ from release_notes_generator.record.factory.default_record_factory import DefaultRecordFactory from release_notes_generator.utils.pull_request_utils import get_issues_for_pr, extract_issue_numbers_from_body +from release_notes_generator.utils.record_utils import get_id, parse_issue_id logger = logging.getLogger(__name__) @@ -53,8 +54,8 @@ def __init__(self, github: Github, home_repository: Repository) -> None: self.__sub_issue_parents: dict[str, str] = {} self.__registered_commits: set[str] = set() - self.__external_sub_issues: list[SubIssue] = [] - self.__local_sub_issues_checked: list[str] = [] + # self.__external_sub_issues: list[SubIssue] = [] + # self.__local_sub_issues_checked: list[str] = [] def generate(self, data: MinedData) -> dict[str, Record]: """ @@ -66,48 +67,64 @@ def generate(self, data: MinedData) -> dict[str, Record]: Returns: dict[str, Record]: A dictionary of records indexed by their IDs. """ - logger.debug("Creation of records started...") - # First register all issues with sub-issues - issues_expansion: list[SubIssue] = [] + logger.debug("Creation of records started...") # NEW: uz mam mnapovani, kdo je hierarchy, kdo je SubIssue a kdo je Issue for issue in data.issues: - if self.get_id(issue) in self.__registered_issues: - continue + id = get_id(issue) + # tmp note - tohle by nemelo by za potrebi - nove uz mam vsechno info, abych to udelal v 1 prubehu + # if get_id(issue) in self.__registered_issues: + # continue - issues_expansion.extend(self._create_issue_record_using_sub_issues_existence(issue, data)) + if len(data.parents_sub_issues[id]) > 0: + # issue has sub-issues - it is either hierarchy issue or sub-hierarchy issue + self._create_record_for_hierarchy_issue(issue) - data.issues.extend(issues_expansion) - - # Second register all hierarchy issues from sub-issues - registered_before = -1 - while registered_before < len(self.__registered_issues): - registered_before = len(self.__registered_issues) - logger.debug("Looking for hierarchical issue among sub-issues...") + elif id in self.__sub_issue_parents.values(): + # issue has no sub-issues - it is sub-issue + self._create_record_for_sub_issue(issue) - issues_expansion = [] - for issue in data.issues: - iid = self.get_id(issue) - if iid in self.__registered_issues: - continue + else: + # issue is not sub-issue and has no sub-issues - it is issue + self._create_record_for_issue(issue) - if iid in self.__sub_issue_parents and iid not in self.__local_sub_issues_checked: - issues_expansion.extend(self._create_issue_record_using_sub_issues_existence(issue, data)) - self.__local_sub_issues_checked.append(iid) + # old code for delete after testing + # First register all issues with sub-issues + # for issue in data.issues: + # if get_id(issue) in self.__registered_issues: + # continue + # + # self._create_issue_record_using_sub_issues_existence(issue, data) - data.issues.extend(issues_expansion) + # Second register all hierarchy issues from sub-issues + # registered_before = -1 + # while registered_before < len(self.__registered_issues): + # registered_before = len(self.__registered_issues) + # logger.debug("Looking for hierarchical issue among sub-issues...") + # + # issues_expansion = [] + # for issue in data.issues: + # iid = self.get_id(issue) + # if iid in self.__registered_issues: + # continue + # + # if iid in self.__sub_issue_parents and iid not in self.__local_sub_issues_checked: + # issues_expansion.extend(self._create_issue_record_using_sub_issues_existence(issue, data)) + # self.__local_sub_issues_checked.append(iid) + # + # data.issues.extend(issues_expansion) # Third register all external sub-issues - for ext_sub_issue in self.__external_sub_issues: - if self.get_id(ext_sub_issue) in self.__registered_issues: - continue - - self._create_record_for_sub_issue(ext_sub_issue) + # for ext_sub_issue in self.__external_sub_issues: + # if self.get_id(ext_sub_issue) in self.__registered_issues: + # continue + # + # self._create_record_for_sub_issue(ext_sub_issue) # Now register all issues without sub-issues - for issue in data.issues: - if self.get_id(issue) in self.__registered_issues: - continue - - self._create_issue_record_using_sub_issues_not_existence(issue) + # for issue in data.issues: + # if self.get_id(issue) in self.__registered_issues: + # continue + # + # self._create_issue_record_using_sub_issues_not_existence(issue) # dev note: Each issue is now in records dict by its issue number - all on same level - no hierarchy # This is useful for population by PRs and commits @@ -119,7 +136,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: logger.debug("Registering direct commits to records...") for commit in data.commits: if commit.sha not in self.__registered_commits: - self._records[self.get_id(commit)] = CommitRecord(commit) + self._records[get_id(commit)] = CommitRecord(commit) # dev note: now we have all PRs and commits registered to issues or as stand-alone records # let build hierarchy @@ -160,22 +177,11 @@ def _register_pull_and_its_commits_to_issue( issue_id, ) # dev note: here we expect that PR links to an issue in the same repository !!! - i_repo_name, i_number_str = issue_id.split("#", 1) - try: - i_number = int(i_number_str) - except ValueError: - logger.error("Invalid issue id: %s", issue_id) - continue - parent_repository = data.get_repository(i_repo_name) or self.get_repository(i_repo_name) - if parent_repository is not None: - if data.get_repository(i_repo_name) is None: - data.add_repository(parent_repository) - parent_issue = self._safe_call(parent_repository.get_issue)(i_number) - else: - parent_issue = None - + org, repo, num = parse_issue_id(issue_id) + repo_full_name = f"{org}/{repo}" + parent_issue = self._safe_call(data.get_repository(repo_full_name).get_issue)(num) if data.get_repository(repo_full_name) is not None else None if parent_issue is not None: - self._create_issue_record_using_sub_issues_existence(parent_issue, data) + self._create_record_for_issue(parent_issue) if issue_id in self._records and isinstance( self._records[issue_id], (SubIssueRecord, HierarchyIssueRecord, IssueRecord) @@ -194,75 +200,77 @@ def _register_pull_and_its_commits_to_issue( pr_rec = PullRequestRecord(pull, pull_labels, skip_record) for c in related_commits: # register commits to the PR record pr_rec.register_commit(c) - pid = self.get_id(pull) + pid = get_id(pull) self._records[pid] = pr_rec logger.debug("Created record for PR %s: %s", pid, pull.title) - def _create_issue_record_using_sub_issues_existence(self, issue: Issue, data: MinedData) -> list[SubIssue]: - # use presence of sub-issues as a hint for hierarchy issue or non hierarchy issue - sub_issues = list(issue.get_sub_issues()) - logger.debug("Found %d sub-issues for %d", len(sub_issues), issue.number) - new_local_issues: list[SubIssue] = [] - - if len(sub_issues) > 0: - self._create_record_for_hierarchy_issue(issue) - for si in sub_issues: - siid = self.get_id(si) - - # check if sub-issue is from current repository - if si.repository.full_name != issue.repository.full_name: - # register sub-issue and its parent for later hierarchy building - # Note: GitHub now allows only 1 parent - self.__sub_issue_parents[siid] = self.get_id(issue) - - self.__external_sub_issues.append(si) - logger.debug( - "Detected sub-issue %d from different repository %s - adding as external sub-issue" - " for later processing", - si.number, - si.repository.full_name, - ) - - else: - use_issue = False - if ( - data.since - and si.state == IssueRecord.ISSUE_STATE_CLOSED - and si.closed_at - and data.since > si.closed_at - ): - logger.debug("Detected sub-issue %d closed in previous release.", si.number) - if len(list(si.get_sub_issues())) > 0: - use_issue = True - else: - self.__registered_issues.add(siid) - - elif si.state == IssueRecord.ISSUE_STATE_OPEN: - logger.debug("Detected sub-issue %d is still open.", si.number) - if len(list(si.get_sub_issues())) > 0: - use_issue = True - else: - self.__registered_issues.add(siid) - - elif si.state == IssueRecord.ISSUE_STATE_CLOSED: # issue is valid - use_issue = True - - else: - logger.warning("Detected unexpected sub-issue %d with parent %d", si.number, issue.number) - - if use_issue: - self.__sub_issue_parents[siid] = self.get_id(issue) - if si not in data.issues: - new_local_issues.append(si) - - return new_local_issues - - def _create_issue_record_using_sub_issues_not_existence(self, issue: Issue) -> None: - # Expected to run after all issue with sub-issues are registered - if self.get_id(issue) in self.__sub_issue_parents.keys(): # pylint: disable=consider-iterating-dictionary - self._create_record_for_sub_issue(issue) - else: - self._create_record_for_issue(issue) + # def _create_issue_record_using_sub_issues_existence(self, issue: Issue, data: MinedData) -> list[SubIssue]: + # # use presence of sub-issues as a hint for hierarchy issue or non hierarchy issue + # iid = get_id(issue) + # sub_issues_ids = data.parents_sub_issues[iid] + # logger.debug("Found %d sub-issues for %d", len(sub_issues_ids), issue.number) + # + # if len(sub_issues_ids) > 0: + # self._create_record_for_hierarchy_issue(issue) + # for siid in sub_issues_ids: + # org, repo, num = parse_issue_id(siid) + # + # # check if sub-issue is from current repository + # if f"{org}/{repo}" != issue.repository.full_name: + # # register sub-issue and its parent for later hierarchy building + # # Note: GitHub now allows only 1 parent + # self.__sub_issue_parents[siid] = get_id(issue) + # + # # TODO - fetch + # + # self.__external_sub_issues.append(si) + # logger.debug( + # "Detected sub-issue %d from different repository %s - adding as external sub-issue" + # " for later processing", + # num, + # f"{org}/{repo}", + # ) + # + # else: + # use_issue = False + # if ( + # data.since + # and si.state == IssueRecord.ISSUE_STATE_CLOSED + # and si.closed_at + # and data.since > si.closed_at + # ): + # logger.debug("Detected sub-issue %d closed in previous release.", si.number) + # if len(list(si.get_sub_issues())) > 0: + # use_issue = True + # else: + # self.__registered_issues.add(siid) + # + # elif si.state == IssueRecord.ISSUE_STATE_OPEN: + # logger.debug("Detected sub-issue %d is still open.", si.number) + # if len(list(si.get_sub_issues())) > 0: + # use_issue = True + # else: + # self.__registered_issues.add(siid) + # + # elif si.state == IssueRecord.ISSUE_STATE_CLOSED: # issue is valid + # use_issue = True + # + # else: + # logger.warning("Detected unexpected sub-issue %d with parent %d", si.number, issue.number) + # + # if use_issue: + # self.__sub_issue_parents[siid] = get_id(issue) + # if si not in data.issues: + # new_local_issues.append(si) + # + # return new_local_issues + + # def _create_issue_record_using_sub_issues_not_existence(self, issue: Issue) -> None: + # # Expected to run after all issue with sub-issues are registered + # if get_id(issue) in self.__sub_issue_parents.keys(): # pylint: disable=consider-iterating-dictionary + # self._create_record_for_sub_issue(issue) + # else: + # self._create_record_for_issue(issue) def _create_record_for_hierarchy_issue(self, i: Issue, issue_labels: Optional[list[str]] = None) -> None: """ @@ -276,7 +284,7 @@ def _create_record_for_hierarchy_issue(self, i: Issue, issue_labels: Optional[li None """ # check for skip labels presence and skip when detected - iid = self.get_id(i) + iid = get_id(i) if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(i) skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) @@ -300,13 +308,13 @@ def _create_record_for_issue(self, issue: Issue, issue_labels: Optional[list[str issue_labels = self._get_issue_labels_mix_with_type(issue) super()._create_record_for_issue(issue, issue_labels) - self.__registered_issues.add(self.get_id(issue)) + self.__registered_issues.add(get_id(issue)) def _create_record_for_sub_issue(self, issue: Issue, issue_labels: Optional[list[str]] = None) -> None: if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(issue) - iid: str = self.get_id(issue) + iid: str = get_id(issue) skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) logger.debug("Created record for sub issue %s: %s", iid, issue.title) self.__registered_issues.add(iid) diff --git a/release_notes_generator/utils/record_utils.py b/release_notes_generator/utils/record_utils.py new file mode 100644 index 00000000..2a3818d8 --- /dev/null +++ b/release_notes_generator/utils/record_utils.py @@ -0,0 +1,52 @@ +import logging +import re +from functools import singledispatchmethod, lru_cache +from typing import cast + +from github.Commit import Commit +from github.Issue import Issue +from github.PullRequest import PullRequest +from github.Repository import Repository + +logger = logging.getLogger(__name__) + +ISSUE_ID_RE = re.compile(r"^(?P[^/\s]+)/(?P[^#\s]+)#(?P\d+)$") + +class IssueIdParseError(ValueError): + pass + +def get_id(obj, repository: Repository) -> str: + if isinstance(obj, Issue): + issue = cast(Issue, obj) + return _issue_id(repository.full_name, issue.number) + elif isinstance(obj, PullRequest): + pr = cast(PullRequest, obj) + return _pr_id(repository.full_name, pr.number) + elif isinstance(obj, Commit): + commit = cast(Commit, obj) + return f"{commit.sha}" + + return str(obj) + +@lru_cache(maxsize=2048) +def _issue_id(repo_full_name: str, number: int) -> str: + return f"{repo_full_name}#{number}" + +@lru_cache(maxsize=2048) +def _pr_id(repo_full_name: str, number: int) -> str: + return f"{repo_full_name}#{number}" + +def parse_issue_id(issue_id: str) -> tuple[str, str, int]: + """ + Parse 'org/repo#123' -> (org, repo, 123). + Raises IssueIdParseError on malformed input. + """ + m = ISSUE_ID_RE.match(issue_id.strip()) + if not m: + raise IssueIdParseError( + f"Invalid issue id '{issue_id}', expected 'org/repo#number'" + ) + return m.group("org"), m.group("repo"), int(m.group("num")) + +def format_issue_id(org: str, repo: str, number: int) -> str: + return f"{org}/{repo}#{number}" diff --git a/tests/release_notes/data/__init__.py b/tests/release_notes/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_filter.py b/tests/release_notes/data/test_filter.py similarity index 98% rename from tests/test_filter.py rename to tests/release_notes/data/test_filter.py index 30c58809..9188f09e 100644 --- a/tests/test_filter.py +++ b/tests/release_notes/data/test_filter.py @@ -20,7 +20,7 @@ from github.Repository import Repository -from release_notes_generator.filter import FilterByRelease +from release_notes_generator.data.filter import FilterByRelease from release_notes_generator.model.mined_data import MinedData diff --git a/tests/test_miner.py b/tests/release_notes/data/test_miner.py similarity index 99% rename from tests/test_miner.py rename to tests/release_notes/data/test_miner.py index d741f5bd..79ea6f1a 100644 --- a/tests/test_miner.py +++ b/tests/release_notes/data/test_miner.py @@ -24,7 +24,7 @@ from github.PullRequest import PullRequest from github.Repository import Repository -from release_notes_generator.miner import DataMiner +from release_notes_generator.data.miner import DataMiner from release_notes_generator.model.mined_data import MinedData def decorator_mock(func): diff --git a/tests/release_notes/record/factory/utils/__init__.py b/tests/release_notes/record/factory/utils/__init__.py new file mode 100644 index 00000000..e69de29b From f0f6327abb6171d1d3bb803e76d08dea40062cfd Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 12:32:22 +0200 Subject: [PATCH 2/8] Backup before unit test fix. --- integration_test.py | 8 ++-- release_notes_generator/data/miner.py | 8 ++-- .../data/utils/bulk_sub_issue_collector.py | 13 ++++--- release_notes_generator/generator.py | 2 +- release_notes_generator/model/mined_data.py | 4 +- .../record/factory/default_record_factory.py | 26 ++++++------- .../factory/issue_hierarchy_record_factory.py | 37 +++++++++---------- 7 files changed, 52 insertions(+), 46 deletions(-) diff --git a/integration_test.py b/integration_test.py index dce07478..a636eab1 100644 --- a/integration_test.py +++ b/integration_test.py @@ -12,9 +12,11 @@ collector = BulkSubIssueCollector(token, cfg=cfg) -new_parents = [ - "absa-group/AUL#2960", -] +# new_parents = [ +# "absa-group/AUL#2960", +# ] + +new_parents = ['absa-group/AUL#3800', 'absa-group/AUL#3799', 'absa-group/AUL#3797', 'absa-group/AUL#3796', 'absa-group/AUL#3794', 'absa-group/AUL#3793', 'absa-group/AUL#3792', 'absa-group/AUL#3791', 'absa-group/AUL#3790', 'absa-group/AUL#3788', 'absa-group/AUL#3787', 'absa-group/AUL#3783', 'absa-group/AUL#3780', 'absa-group/AUL#3779', 'absa-group/AUL#3776', 'absa-group/AUL#3768', 'absa-group/AUL#3767', 'absa-group/AUL#3758', 'absa-group/AUL#3756', 'absa-group/AUL#3744', 'absa-group/AUL#3742', 'absa-group/AUL#3740', 'absa-group/AUL#3737', 'absa-group/AUL#3735', 'absa-group/AUL#3726', 'absa-group/AUL#3725', 'absa-group/AUL#3724', 'absa-group/AUL#3723', 'absa-group/AUL#3722', 'absa-group/AUL#3721', 'absa-group/AUL#3720', 'absa-group/AUL#3718', 'absa-group/AUL#3716', 'absa-group/AUL#3710', 'absa-group/AUL#3709', 'absa-group/AUL#3708', 'absa-group/AUL#3707', 'absa-group/AUL#3706', 'absa-group/AUL#3705', 'absa-group/AUL#3704', 'absa-group/AUL#3700', 'absa-group/AUL#3694', 'absa-group/AUL#3691', 'absa-group/AUL#3689', 'absa-group/AUL#3688', 'absa-group/AUL#3687', 'absa-group/AUL#3686', 'absa-group/AUL#3684', 'absa-group/AUL#3674', 'absa-group/AUL#3672', 'absa-group/AUL#3665', 'absa-group/AUL#3664', 'absa-group/AUL#3659', 'absa-group/AUL#3599', 'absa-group/AUL#3588', 'absa-group/AUL#3585', 'absa-group/AUL#3583', 'absa-group/AUL#3527', 'absa-group/AUL#3516', 'absa-group/AUL#3501', 'absa-group/AUL#3488', 'absa-group/AUL#3487', 'absa-group/AUL#3486', 'absa-group/AUL#3482', 'absa-group/AUL#3301', 'absa-group/AUL#3292', 'absa-group/AUL#3281', 'absa-group/AUL#3226', 'absa-group/AUL#3207', 'absa-group/AUL#3009', 'absa-group/AUL#2960', 'absa-group/AUL#2849', 'absa-group/AUL#2832', 'absa-group/AUL#2824', 'absa-group/AUL#2560', 'absa-group/AUL#2048', 'absa-group/AUL#3749', 'absa-group/AUL#3746', 'absa-group/AUL#3745', 'absa-group/AUL#3738', 'absa-group/AUL#3736', 'absa-group/AUL#3734', 'absa-group/AUL#3712', 'absa-group/AUL#3698', 'absa-group/AUL#3697', 'absa-group/AUL#3695', 'absa-group/AUL#3685', 'absa-group/AUL#3682', 'absa-group/AUL#3679', 'absa-group/AUL#3675', 'absa-group/AUL#3671', 'absa-group/AUL#3669', 'absa-group/AUL#3658', 'absa-group/AUL#3657', 'absa-group/AUL#3656', 'absa-group/AUL#3655', 'absa-group/AUL#3654', 'absa-group/AUL#3653', 'absa-group/AUL#3652', 'absa-group/AUL#3651', 'absa-group/AUL#3650', 'absa-group/AUL#3649', 'absa-group/AUL#3648', 'absa-group/AUL#3642', 'absa-group/AUL#3635', 'absa-group/AUL#3619', 'absa-group/AUL#3613', 'absa-group/AUL#3612', 'absa-group/AUL#3607', 'absa-group/AUL#3603', 'absa-group/AUL#3600', 'absa-group/AUL#3598', 'absa-group/AUL#3596', 'absa-group/AUL#3595', 'absa-group/AUL#3586', 'absa-group/AUL#3582', 'absa-group/AUL#3571', 'absa-group/AUL#3559', 'absa-group/AUL#3556', 'absa-group/AUL#3554', 'absa-group/AUL#3553', 'absa-group/AUL#3550', 'absa-group/AUL#3548', 'absa-group/AUL#3547', 'absa-group/AUL#3544', 'absa-group/AUL#3539', 'absa-group/AUL#3536', 'absa-group/AUL#3535', 'absa-group/AUL#3534', 'absa-group/AUL#3533', 'absa-group/AUL#3530', 'absa-group/AUL#3529', 'absa-group/AUL#3526', 'absa-group/AUL#3525', 'absa-group/AUL#3522', 'absa-group/AUL#3515', 'absa-group/AUL#3514', 'absa-group/AUL#3498', 'absa-group/AUL#3474', 'absa-group/AUL#3473', 'absa-group/AUL#3465', 'absa-group/AUL#3464', 'absa-group/AUL#3457', 'absa-group/AUL#3453', 'absa-group/AUL#3452', 'absa-group/AUL#3437', 'absa-group/AUL#3405', 'absa-group/AUL#3399', 'absa-group/AUL#3385', 'absa-group/AUL#3380', 'absa-group/AUL#3373', 'absa-group/AUL#3360', 'absa-group/AUL#3328', 'absa-group/AUL#3322', 'absa-group/AUL#3317', 'absa-group/AUL#3299', 'absa-group/AUL#3298', 'absa-group/AUL#3297', 'absa-group/AUL#3295', 'absa-group/AUL#3294', 'absa-group/AUL#3287', 'absa-group/AUL#3286', 'absa-group/AUL#3285', 'absa-group/AUL#3284', 'absa-group/AUL#3275', 'absa-group/AUL#3266', 'absa-group/AUL#3265', 'absa-group/AUL#3264', 'absa-group/AUL#3263', 'absa-group/AUL#3262', 'absa-group/AUL#3251', 'absa-group/AUL#3247', 'absa-group/AUL#3246', 'absa-group/AUL#3237', 'absa-group/AUL#3236', 'absa-group/AUL#3235', 'absa-group/AUL#3234', 'absa-group/AUL#3233', 'absa-group/AUL#3224', 'absa-group/AUL#3219', 'absa-group/AUL#3206', 'absa-group/AUL#3203', 'absa-group/AUL#3201', 'absa-group/AUL#3181', 'absa-group/AUL#3177', 'absa-group/AUL#3173', 'absa-group/AUL#3172', 'absa-group/AUL#3153', 'absa-group/AUL#3141', 'absa-group/AUL#3122', 'absa-group/AUL#3104', 'absa-group/AUL#3091', 'absa-group/AUL#3085', 'absa-group/AUL#3084', 'absa-group/AUL#3080', 'absa-group/AUL#3079', 'absa-group/AUL#3072', 'absa-group/AUL#3052', 'absa-group/AUL#3020', 'absa-group/AUL#3006', 'absa-group/AUL#3000', 'absa-group/AUL#2933', 'absa-group/AUL#2932', 'absa-group/AUL#2920', 'absa-group/AUL#2913', 'absa-group/AUL#2912', 'absa-group/AUL#2887', 'absa-group/AUL#2886', 'absa-group/AUL#2884', 'absa-group/AUL#2883', 'absa-group/AUL#2876', 'absa-group/AUL#2873', 'absa-group/AUL#2847', 'absa-group/AUL#2842', 'absa-group/AUL#2841', 'absa-group/AUL#2835', 'absa-group/AUL#2823', 'absa-group/AUL#2819', 'absa-group/AUL#2810', 'absa-group/AUL#2806', 'absa-group/AUL#2801', 'absa-group/AUL#2781', 'absa-group/AUL#2768', 'absa-group/AUL#2754', 'absa-group/AUL#2742', 'absa-group/AUL#2732', 'absa-group/AUL#2725', 'absa-group/AUL#2724', 'absa-group/AUL#2722', 'absa-group/AUL#2720', 'absa-group/AUL#2712', 'absa-group/AUL#2709', 'absa-group/AUL#2707', 'absa-group/AUL#2706', 'absa-group/AUL#2691', 'absa-group/AUL#2679', 'absa-group/AUL#2678', 'absa-group/AUL#2674', 'absa-group/AUL#2671', 'absa-group/AUL#2668', 'absa-group/AUL#2664', 'absa-group/AUL#2660', 'absa-group/AUL#2649', 'absa-group/AUL#2647', 'absa-group/AUL#2631', 'absa-group/AUL#2619', 'absa-group/AUL#2590', 'absa-group/AUL#2584', 'absa-group/AUL#2583', 'absa-group/AUL#2551', 'absa-group/AUL#2549', 'absa-group/AUL#2530', 'absa-group/AUL#2507', 'absa-group/AUL#2504', 'absa-group/AUL#2503', 'absa-group/AUL#2494', 'absa-group/AUL#2484', 'absa-group/AUL#2477', 'absa-group/AUL#2470', 'absa-group/AUL#2439', 'absa-group/AUL#2432', 'absa-group/AUL#2416', 'absa-group/AUL#2414', 'absa-group/AUL#2412', 'absa-group/AUL#2411', 'absa-group/AUL#2407', 'absa-group/AUL#2405', 'absa-group/AUL#2404', 'absa-group/AUL#2400', 'absa-group/AUL#2394', 'absa-group/AUL#2391', 'absa-group/AUL#2385', 'absa-group/AUL#2374', 'absa-group/AUL#2369', 'absa-group/AUL#2368', 'absa-group/AUL#2367', 'absa-group/AUL#2366', 'absa-group/AUL#2365', 'absa-group/AUL#2364', 'absa-group/AUL#2344', 'absa-group/AUL#2343', 'absa-group/AUL#2338', 'absa-group/AUL#2335', 'absa-group/AUL#2332', 'absa-group/AUL#2326', 'absa-group/AUL#2320', 'absa-group/AUL#2319', 'absa-group/AUL#2318', 'absa-group/AUL#2313', 'absa-group/AUL#2304', 'absa-group/AUL#2303', 'absa-group/AUL#2302', 'absa-group/AUL#2301', 'absa-group/AUL#2300', 'absa-group/AUL#2299', 'absa-group/AUL#2298', 'absa-group/AUL#2216', 'absa-group/AUL#2200', 'absa-group/AUL#2181', 'absa-group/AUL#2175', 'absa-group/AUL#2174', 'absa-group/AUL#2167', 'absa-group/AUL#2163', 'absa-group/AUL#2147', 'absa-group/AUL#2142', 'absa-group/AUL#2118', 'absa-group/AUL#2114', 'absa-group/AUL#2112', 'absa-group/AUL#2096', 'absa-group/AUL#2061', 'absa-group/AUL#2059', 'absa-group/AUL#1966', 'absa-group/AUL#1965', 'absa-group/AUL#1961', 'absa-group/AUL#1925', 'absa-group/AUL#1924', 'absa-group/AUL#1901', 'absa-group/AUL#1857', 'absa-group/AUL#1796', 'absa-group/AUL#1674', 'absa-group/AUL#1669', 'absa-group/AUL#1668', 'absa-group/AUL#1665', 'absa-group/AUL#1662', 'absa-group/AUL#1660', 'absa-group/AUL#1634', 'absa-group/AUL#1577', 'absa-group/AUL#1571', 'absa-group/AUL#1566', 'absa-group/AUL#1559', 'absa-group/AUL#1555', 'absa-group/AUL#1552', 'absa-group/AUL#1545', 'absa-group/AUL#1520', 'absa-group/AUL#1517', 'absa-group/AUL#1516', 'absa-group/AUL#1507', 'absa-group/AUL#1506', 'absa-group/AUL#1505', 'absa-group/AUL#1504', 'absa-group/AUL#1502', 'absa-group/AUL#1501', 'absa-group/AUL#1497', 'absa-group/AUL#1496', 'absa-group/AUL#1485', 'absa-group/AUL#1483', 'absa-group/AUL#1433', 'absa-group/AUL#1416', 'absa-group/AUL#1407', 'absa-group/AUL#1406', 'absa-group/AUL#1360', 'absa-group/AUL#1356', 'absa-group/AUL#1353', 'absa-group/AUL#1351', 'absa-group/AUL#1347', 'absa-group/AUL#1343', 'absa-group/AUL#1333', 'absa-group/AUL#1331', 'absa-group/AUL#1322', 'absa-group/AUL#1319', 'absa-group/AUL#1298', 'absa-group/AUL#1249', 'absa-group/AUL#1188', 'absa-group/AUL#1024', 'absa-group/AUL#870', 'absa-group/AUL#621', 'absa-group/AUL#584', 'absa-group/AUL#478', 'absa-group/AUL#476', 'absa-group/AUL#413', 'absa-group/AUL#405', 'absa-group/AUL#137'] while new_parents: new_parents = collector.scan_sub_issues_for_parents(new_parents) diff --git a/release_notes_generator/data/miner.py b/release_notes_generator/data/miner.py index fa55d924..2508663d 100644 --- a/release_notes_generator/data/miner.py +++ b/release_notes_generator/data/miner.py @@ -108,7 +108,7 @@ def _scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> dict[str, # run in cycle to get all levels of hierarchy while new_parent_ids: logger.debug("Scanning sub-issues with parent ids: %s", new_parent_ids) - new_parent_ids = bulk_sub_issue_collector.scan_sub_issues_for_parents(parents_to_check) + new_parent_ids = bulk_sub_issue_collector.scan_sub_issues_for_parents(new_parent_ids) parents_sub_issues.update(bulk_sub_issue_collector.parents_sub_issues) return parents_sub_issues @@ -132,13 +132,15 @@ def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Reposito # fetch issue by id org, repo, num = parse_issue_id(parent_id) - if not self.get_repository(f"{org}/{repo}"): + if data.get_repository(f"{org}/{repo}") is None: new_repo = self._get_repository(f"{org}/{repo}") if new_repo is not None: # cache for subsequent lookups data.add_repository(new_repo) - issue = self._safe_call(self.get_repository(f"{org}/{repo}").get_issue)(int(parent_id)) + issue = None + if data.get_repository(f"{org}/{repo}") is not None: + issue = self._safe_call(data.get_repository(f"{org}/{repo}").get_issue)(num) if issue is None: logger.error("Issue not found: %s", parent_id) continue diff --git a/release_notes_generator/data/utils/bulk_sub_issue_collector.py b/release_notes_generator/data/utils/bulk_sub_issue_collector.py index 3e359f93..626743f8 100644 --- a/release_notes_generator/data/utils/bulk_sub_issue_collector.py +++ b/release_notes_generator/data/utils/bulk_sub_issue_collector.py @@ -30,8 +30,8 @@ class CollectorConfig: # Pagination and batching per_page: int = 100 # Max allowed by GitHub for subIssues - max_parents_per_repo: int = 2 # Max issue aliases per repository(...) block - max_repos_per_request: int = 8 # Max repository blocks per query + max_parents_per_repo: int = 100 # Max issue aliases per repository(...) block + max_repos_per_request: int = 1 # Max repository blocks per query # Pacing gentle_pacing_seconds: float = 0.05 @@ -166,8 +166,12 @@ def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: child_ids.append(child_id) # If the child has children, it's a "new parent" - if child["subIssues"]["totalCount"] > 0 and child_id not in originals: - new_parents_to_check.add(child_id) + if child_id not in originals: + if child["subIssues"]["totalCount"] > 0: + new_parents_to_check.add(child_id) + else: + # save no sub-issues for non-parents + self.parents_sub_issues.setdefault(child_id, []) logger.debug("Sub-issues found for parent %s: %s", parent_id, child_ids) @@ -186,7 +190,6 @@ def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: # ---------- internals ---------- def _post_graphql(self, payload: dict) -> dict: - logger.info("Posting graphql payload: %s", payload) last_exc: Optional[Exception] = None for attempt in range(1, self._cfg.max_retries + 1): try: diff --git a/release_notes_generator/generator.py b/release_notes_generator/generator.py index f43c4ff0..64ea8aa4 100644 --- a/release_notes_generator/generator.py +++ b/release_notes_generator/generator.py @@ -92,7 +92,7 @@ def generate(self) -> Optional[str]: # data expansion when hierarchy is enabled if ActionInputs.get_hierarchy(): - data.issues.update(miner.mine_missing_sub_issues(data)) + data_filtered_by_release.issues.update(miner.mine_missing_sub_issues(data_filtered_by_release)) changelog_url: str = get_change_url( tag_name=ActionInputs.get_tag_name(), diff --git a/release_notes_generator/model/mined_data.py b/release_notes_generator/model/mined_data.py index 204dec06..646fbcf9 100644 --- a/release_notes_generator/model/mined_data.py +++ b/release_notes_generator/model/mined_data.py @@ -41,11 +41,13 @@ class MinedData: def __init__(self, repository: Repository): self._home_repository_full_name: str = repository.full_name self._repositories: dict[str, Repository] = {repository.full_name: repository} + self.release: Optional[GitRelease] = None + self.since = datetime(1970, 1, 1) # Default to epoch start + self.issues: dict[Issue, Repository] = {} self.pull_requests: dict[PullRequest, Repository] = {} self.commits: dict[Commit, Repository] = {} - self.since = datetime(1970, 1, 1) # Default to epoch start self.parents_sub_issues: dict[str, list[str]] = {} # parent issue id -> list of its sub-issues ids diff --git a/release_notes_generator/record/factory/default_record_factory.py b/release_notes_generator/record/factory/default_record_factory.py index 9c16d541..221cd1e2 100644 --- a/release_notes_generator/record/factory/default_record_factory.py +++ b/release_notes_generator/record/factory/default_record_factory.py @@ -78,8 +78,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: dict[str, Record]: A dictionary of records keyed by 'owner/repo#number' (or commit SHA for commits). """ - def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: - l_pid = get_id(pr) + def register_pull_request(pr: PullRequest, l_pid: str, skip_rec: bool) -> None: l_pull_labels = [label.name for label in pr.get_labels()] attached_any = False detected_issues = extract_issue_numbers_from_body(pr, repository=data.home_repository) @@ -115,7 +114,7 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: parent_issue = None if parent_issue is not None: - self._create_record_for_issue(parent_issue) + self._create_record_for_issue(parent_issue, parent_issue_id) if parent_issue_id in self._records: cast(IssueRecord, self._records[parent_issue_id]).register_pull_request(pr) @@ -134,12 +133,12 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: logger.debug("Created stand-alone PR record %s: %s (fallback)", l_pid, pr.title) logger.debug("Registering issues to records...") - for issue in data.issues: - self._create_record_for_issue(issue) + for issue, repo in data.issues.items(): + self._create_record_for_issue(issue, get_id(issue, repo)) logger.debug("Registering pull requests to records...") - for pull in data.pull_requests: - pid = get_id(pull) + for pull, repo in data.pull_requests.items(): + pid = get_id(pull, repo) pull_labels = [label.name for label in pull.get_labels()] skip_record: bool = any(item in pull_labels for item in ActionInputs.get_skip_release_notes_labels()) @@ -150,10 +149,10 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: logger.debug("Created record for PR %s: %s", pid, pull.title) else: logger.debug("Registering pull number: %s, title : %s", pid, pull.title) - register_pull_request(pull, skip_record) + register_pull_request(pull, pid, skip_record) logger.debug("Registering commits to records...") - detected_direct_commits_count = sum(not self.register_commit_to_record(commit) for commit in data.commits) + detected_direct_commits_count = sum(not self.register_commit_to_record(commit, get_id(commit, repo)) for commit, repo in data.commits.items()) logger.info( "Generated %d records from %d issues and %d PRs, with %d commits detected.", @@ -164,7 +163,7 @@ def register_pull_request(pr: PullRequest, skip_rec: bool) -> None: ) return self._records - def register_commit_to_record(self, commit: Commit) -> bool: + def register_commit_to_record(self, commit: Commit, cid: str) -> bool: """ Register a commit to a record. @@ -186,16 +185,17 @@ def register_commit_to_record(self, commit: Commit) -> bool: rec_pr.register_commit(commit) return True - self._records[get_id(commit)] = CommitRecord(commit=commit) + self._records[cid] = CommitRecord(commit=commit) logger.debug("Created record for direct commit %s: %s", commit.sha, commit.commit.message) return False - def _create_record_for_issue(self, issue: Issue, issue_labels: Optional[list[str]] = None) -> None: + def _create_record_for_issue(self, issue: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: """ Create a record for an issue. Parameters: issue (Issue): The issue to create a record for. + iid (str): The ID of the issue in the format 'owner/repo#number'. issue_labels (Optional[list[str]]): Optional set of labels for the issue. If not provided, labels will be fetched from the issue. Returns: @@ -205,5 +205,5 @@ def _create_record_for_issue(self, issue: Issue, issue_labels: Optional[list[str if issue_labels is None: issue_labels = [label.name for label in issue.get_labels()] skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) - self._records[iid := get_id(issue)] = IssueRecord(issue=issue, skip=skip_record, issue_labels=issue_labels) + self._records[iid] = IssueRecord(issue=issue, skip=skip_record, issue_labels=issue_labels) logger.debug("Created record for non hierarchy issue '%s': %s", iid, issue.title) diff --git a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py index 4c6f9d05..a250e7d6 100644 --- a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py +++ b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py @@ -68,23 +68,23 @@ def generate(self, data: MinedData) -> dict[str, Record]: dict[str, Record]: A dictionary of records indexed by their IDs. """ logger.debug("Creation of records started...") # NEW: uz mam mnapovani, kdo je hierarchy, kdo je SubIssue a kdo je Issue - for issue in data.issues: - id = get_id(issue) + for issue, repo in data.issues.items(): + iid = get_id(issue, repo) # tmp note - tohle by nemelo by za potrebi - nove uz mam vsechno info, abych to udelal v 1 prubehu # if get_id(issue) in self.__registered_issues: # continue - if len(data.parents_sub_issues[id]) > 0: + if len(data.parents_sub_issues[iid]) > 0: # issue has sub-issues - it is either hierarchy issue or sub-hierarchy issue - self._create_record_for_hierarchy_issue(issue) + self._create_record_for_hierarchy_issue(issue, iid) elif id in self.__sub_issue_parents.values(): # issue has no sub-issues - it is sub-issue - self._create_record_for_sub_issue(issue) + self._create_record_for_sub_issue(issue, iid) else: # issue is not sub-issue and has no sub-issues - it is issue - self._create_record_for_issue(issue) + self._create_record_for_issue(issue, iid) # old code for delete after testing # First register all issues with sub-issues @@ -130,13 +130,13 @@ def generate(self, data: MinedData) -> dict[str, Record]: # This is useful for population by PRs and commits logger.debug("Registering Commits to Pull Requests and Pull Requests to Issues...") - for pull in data.pull_requests: - self._register_pull_and_its_commits_to_issue(pull, data) + for pull, repo in data.pull_requests.items(): + self._register_pull_and_its_commits_to_issue(pull, get_id(pull, repo), data) logger.debug("Registering direct commits to records...") - for commit in data.commits: + for commit, repo in data.commits.items(): if commit.sha not in self.__registered_commits: - self._records[get_id(commit)] = CommitRecord(commit) + self._records[get_id(commit, repo)] = CommitRecord(commit) # dev note: now we have all PRs and commits registered to issues or as stand-alone records # let build hierarchy @@ -154,7 +154,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: return self._records def _register_pull_and_its_commits_to_issue( - self, pull: PullRequest, data: MinedData, target_repository: Optional[Repository] = None + self, pull: PullRequest, pid: str, data: MinedData, target_repository: Optional[Repository] = None ) -> None: pull_labels = [label.name for label in pull.get_labels()] skip_record: bool = any(item in pull_labels for item in ActionInputs.get_skip_release_notes_labels()) @@ -200,7 +200,6 @@ def _register_pull_and_its_commits_to_issue( pr_rec = PullRequestRecord(pull, pull_labels, skip_record) for c in related_commits: # register commits to the PR record pr_rec.register_commit(c) - pid = get_id(pull) self._records[pid] = pr_rec logger.debug("Created record for PR %s: %s", pid, pull.title) @@ -272,7 +271,7 @@ def _register_pull_and_its_commits_to_issue( # else: # self._create_record_for_issue(issue) - def _create_record_for_hierarchy_issue(self, i: Issue, issue_labels: Optional[list[str]] = None) -> None: + def _create_record_for_hierarchy_issue(self, i: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: """ Create a hierarchy issue record and register sub-issues. @@ -284,7 +283,6 @@ def _create_record_for_hierarchy_issue(self, i: Issue, issue_labels: Optional[li None """ # check for skip labels presence and skip when detected - iid = get_id(i) if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(i) skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) @@ -303,20 +301,19 @@ def _get_issue_labels_mix_with_type(self, issue: Issue) -> list[str]: return labels - def _create_record_for_issue(self, issue: Issue, issue_labels: Optional[list[str]] = None) -> None: + def _create_record_for_issue(self, issue: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(issue) - super()._create_record_for_issue(issue, issue_labels) - self.__registered_issues.add(get_id(issue)) + super()._create_record_for_issue(issue, iid, issue_labels) + self.__registered_issues.add(iid) - def _create_record_for_sub_issue(self, issue: Issue, issue_labels: Optional[list[str]] = None) -> None: + def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(issue) - iid: str = get_id(issue) skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) - logger.debug("Created record for sub issue %s: %s", iid, issue.title) + logger.debug("Created record for sub issue %s: %s", id, issue.title) self.__registered_issues.add(iid) self._records[iid] = SubIssueRecord(issue, issue_labels, skip_record) From 41b65518904c83bcc9931e7f1634be2cf384160e Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 13:51:57 +0200 Subject: [PATCH 3/8] Fixed unit tests except hierarchy building ones with detected error. --- tests/conftest.py | 177 ++++++++++-------- .../builder/test_release_notes_builder.py | 4 + tests/release_notes/data/test_filter.py | 39 ++-- tests/release_notes/data/test_miner.py | 42 ++--- .../factory/test_default_record_factory.py | 42 ++--- tests/test_release_notes_generator.py | 4 +- 6 files changed, 167 insertions(+), 141 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c6c89ccc..c94787bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ from release_notes_generator.chapters.custom_chapters import CustomChapters from release_notes_generator.model.sub_issue_record import SubIssueRecord from release_notes_generator.utils.github_rate_limiter import GithubRateLimiter +from release_notes_generator.utils.record_utils import get_id # Test classes @@ -624,6 +625,7 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( solo_closed_issue = copy.deepcopy(mock_issue_closed) # 121 solo_closed_issue.body += "\nRelease Notes:\n- Solo issue release note" solo_closed_issue.get_labels.return_value = [] + data.parents_sub_issues[get_id(solo_closed_issue, mock_repo)] = [] # single hierarchy issue record - two sub-issues without PRs hi_two_sub_issues_no_prs = copy.deepcopy(mock_open_hierarchy_issue) @@ -632,7 +634,10 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( hi_two_sub_issues_no_prs.body = "I301 open\nRelease Notes:\n- Hierarchy level release note" sub_issue_1 = copy.deepcopy(mock_open_sub_issue) sub_issue_2 = copy.deepcopy(mock_closed_sub_issue) - hi_two_sub_issues_no_prs.get_sub_issues.return_value = [sub_issue_1, sub_issue_2] + # hi_two_sub_issues_no_prs.get_sub_issues.return_value = [sub_issue_1, sub_issue_2] + data.parents_sub_issues[si1 := get_id(sub_issue_1, mock_repo)] = [] + data.parents_sub_issues[si2 := get_id(sub_issue_2, mock_repo)] = [] + data.parents_sub_issues[get_id(hi_two_sub_issues_no_prs, mock_repo)] = [si1, si2] # single hierarchy issue record - two sub-issues with PRs - no commits hi_two_sub_issues_with_prs = copy.deepcopy(mock_open_hierarchy_issue) @@ -653,7 +658,10 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_pr_closed_2.merge_commit_sha = "merge_commit_sha_150" mock_pr_closed_2.get_labels.return_value = [] mock_pr_closed_2.body += "\nCloses #451" - hi_two_sub_issues_with_prs.get_sub_issues.return_value = [sub_issue_3, sub_issue_4] + # hi_two_sub_issues_with_prs.get_sub_issues.return_value = [sub_issue_3, sub_issue_4] + data.parents_sub_issues[si3 := get_id(sub_issue_3, mock_repo)] = [] + data.parents_sub_issues[si4 := get_id(sub_issue_4, mock_repo)] = [] + data.parents_sub_issues[get_id(hi_two_sub_issues_with_prs, mock_repo)] = [si3, si4] # single hierarchy issue record - two sub-issues with PRs - with commits hi_two_sub_issues_with_prs_with_commit = copy.deepcopy(mock_open_hierarchy_issue) @@ -677,7 +685,10 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_commit_1 = copy.deepcopy(mock_commit) mock_commit_1.sha = "merge_commit_sha_151" mock_commit_1.commit.message = "Fixed bug in PR 151" - hi_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_issue_5, sub_issue_6] + # hi_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_issue_5, sub_issue_6] + data.parents_sub_issues[si5 := get_id(sub_issue_5, mock_repo)] = [] + data.parents_sub_issues[si6 := get_id(sub_issue_6, mock_repo)] = [] + data.parents_sub_issues[get_id(hi_two_sub_issues_with_prs_with_commit, mock_repo)] = [si5, si6] # single hierarchy issue record - one sub hierarchy issues - two sub-issues with PRs - with commits hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit = copy.deepcopy(mock_open_hierarchy_issue) @@ -705,8 +716,12 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_commit_2 = copy.deepcopy(mock_commit) mock_commit_2.sha = "merge_commit_sha_152" mock_commit_2.commit.message = "Fixed bug in PR 152" - sub_hierarchy_issue.get_sub_issues.return_value = [sub_issue_7, sub_issue_8] - hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_hierarchy_issue] + # sub_hierarchy_issue.get_sub_issues.return_value = [sub_issue_7, sub_issue_8] + # hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_hierarchy_issue] + data.parents_sub_issues[si7 := get_id(sub_issue_7, mock_repo)] = [] + data.parents_sub_issues[si8 := get_id(sub_issue_8, mock_repo)] = [] + data.parents_sub_issues[shi := get_id(sub_hierarchy_issue, mock_repo)] = [si7, si8] + data.parents_sub_issues[get_id(hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit, mock_repo)] = [shi] # single pull request record (closed, merged) mock_pr_closed_1 = copy.deepcopy(mock_pull_closed) # 123 @@ -719,18 +734,20 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_commit_3.sha = "merge_commit_sha_direct" mock_commit_3.commit.message = "Direct commit example" - data.issues = [solo_closed_issue, - hi_two_sub_issues_no_prs, - hi_two_sub_issues_with_prs, - hi_two_sub_issues_with_prs_with_commit, - hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit, # index 4 - sub_issue_1, sub_issue_2, # index 5,6 - sub_issue_3, sub_issue_4, # index 7,8 - sub_issue_5, sub_issue_6, # index 9,10 - sub_issue_7, sub_issue_8, # index 11,12 - sub_hierarchy_issue] # index 13 - data.pull_requests = [mock_pr_closed_1, mock_pr_merged_1, mock_pr_closed_2, mock_pr_closed_3, mock_pr_closed_4] - data.commits = [mock_commit_1, mock_commit_2, mock_commit_3] + data.issues = {solo_closed_issue: mock_repo, + hi_two_sub_issues_no_prs: mock_repo, + hi_two_sub_issues_with_prs: mock_repo, + hi_two_sub_issues_with_prs_with_commit: mock_repo, + hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit: mock_repo, # index 4 + sub_issue_1: mock_repo, sub_issue_2: mock_repo, # index 5,6 + sub_issue_3: mock_repo, sub_issue_4: mock_repo, # index 7,8 + sub_issue_5: mock_repo, sub_issue_6: mock_repo, # index 9,10 + sub_issue_7: mock_repo, sub_issue_8: mock_repo, # index 11,12 + sub_hierarchy_issue: mock_repo} # index 13 + data.pull_requests = {mock_pr_closed_1: mock_repo, mock_pr_merged_1: mock_repo, + mock_pr_closed_2: mock_repo, mock_pr_closed_3: mock_repo, + mock_pr_closed_4: mock_repo} + data.commits = {mock_commit_1: mock_repo, mock_commit_2: mock_repo, mock_commit_3: mock_repo} return data @@ -750,26 +767,28 @@ def mined_data_isolated_record_types_with_labels_no_type_defined(mocker, mined_d l_bug = mocker.Mock(spec=MockLabel) l_bug.name = "bug" - data.issues[0].get_labels.return_value = [l_enh] + iks = list(data.issues.keys()) + iks[0].get_labels.return_value = [l_enh] - data.issues[1].get_labels.return_value = [l_epic] # 301 - data.issues[2].get_labels.return_value = [l_epic] # 302 - data.issues[3].get_labels.return_value = [l_epic] # 303 - data.issues[4].get_labels.return_value = [l_epic] # 304 + iks[1].get_labels.return_value = [l_epic] # 301 + iks[2].get_labels.return_value = [l_epic] # 302 + iks[3].get_labels.return_value = [l_epic] # 303 + iks[4].get_labels.return_value = [l_epic] # 304 - data.issues[13].get_labels.return_value = [l_feature] # 350 + iks[13].get_labels.return_value = [l_feature] # 350 - data.issues[5].get_labels.return_value = [l_api] - data.issues[6].get_labels.return_value = [l_api] - data.issues[7].get_labels.return_value = [l_api] - data.issues[8].get_labels.return_value = [l_api] - data.issues[9].get_labels.return_value = [l_api] - data.issues[10].get_labels.return_value = [l_api] - data.issues[11].get_labels.return_value = [l_api] - data.issues[12].get_labels.return_value = [l_api] + iks[5].get_labels.return_value = [l_api] + iks[6].get_labels.return_value = [l_api] + iks[7].get_labels.return_value = [l_api] + iks[8].get_labels.return_value = [l_api] + iks[9].get_labels.return_value = [l_api] + iks[10].get_labels.return_value = [l_api] + iks[11].get_labels.return_value = [l_api] + iks[12].get_labels.return_value = [l_api] - data.pull_requests[0].get_labels.return_value = [l_bug] - data.pull_requests[4].get_labels.return_value = [l_bug] + pks = list(data.pull_requests.keys()) + pks[0].get_labels.return_value = [l_bug] + pks[4].get_labels.return_value = [l_bug] return data @@ -794,37 +813,38 @@ def mined_data_isolated_record_types_no_labels_with_type_defined(mocker, mined_d l_task = mocker.Mock(spec=MockLabel) l_task.name = "task" - data.issues[0].type = t_feature - data.issues[0].get_labels.return_value = [l_feature] - - data.issues[1].type = t_epic # 301 - data.issues[1].get_labels.return_value = [l_epic] - data.issues[2].type = t_epic # 302 - data.issues[2].get_labels.return_value = [l_epic] - data.issues[3].type = t_epic # 303 - data.issues[3].get_labels.return_value = [l_epic] - data.issues[4].type = t_epic # 304 - data.issues[4].get_labels.return_value = [l_epic] - - data.issues[13].type = t_feature # 350 - data.issues[13].get_labels.return_value = [l_feature] - - data.issues[5].type = t_task - data.issues[5].get_labels.return_value = [l_task] - data.issues[6].type = t_task - data.issues[6].get_labels.return_value = [l_task] - data.issues[7].type = t_task - data.issues[7].get_labels.return_value = [l_task] - data.issues[8].type = t_task - data.issues[8].get_labels.return_value = [l_task] - data.issues[9].type = t_task - data.issues[9].get_labels.return_value = [l_task] - data.issues[10].type = t_task - data.issues[10].get_labels.return_value = [l_task] - data.issues[11].type = t_task - data.issues[11].get_labels.return_value = [l_task] - data.issues[12].type = t_task - data.issues[12].get_labels.return_value = [l_task] + iks = list(data.issues.keys()) + iks[0].type = t_feature + iks[0].get_labels.return_value = [l_feature] + + iks[1].type = t_epic # 301 + iks[1].get_labels.return_value = [l_epic] + iks[2].type = t_epic # 302 + iks[2].get_labels.return_value = [l_epic] + iks[3].type = t_epic # 303 + iks[3].get_labels.return_value = [l_epic] + iks[4].type = t_epic # 304 + iks[4].get_labels.return_value = [l_epic] + + iks[13].type = t_feature # 350 + iks[13].get_labels.return_value = [l_feature] + + iks[5].type = t_task + iks[5].get_labels.return_value = [l_task] + iks[6].type = t_task + iks[6].get_labels.return_value = [l_task] + iks[7].type = t_task + iks[7].get_labels.return_value = [l_task] + iks[8].type = t_task + iks[8].get_labels.return_value = [l_task] + iks[9].type = t_task + iks[9].get_labels.return_value = [l_task] + iks[10].type = t_task + iks[10].get_labels.return_value = [l_task] + iks[11].type = t_task + iks[11].get_labels.return_value = [l_task] + iks[12].type = t_task + iks[12].get_labels.return_value = [l_task] return data @@ -842,23 +862,24 @@ def mined_data_isolated_record_types_with_labels_with_type_defined(mocker, mined t_bug = mocker.Mock(spec=IssueType) t_bug.name = "Bug" - data.issues[0].type = t_bug + iks = list(data.issues.keys()) + iks[0].type = t_bug - data.issues[1].type = t_epic # 301 - data.issues[2].type = t_epic # 302 - data.issues[3].type = t_epic # 303 - data.issues[4].type = t_epic # 304 + iks[1].type = t_epic # 301 + iks[2].type = t_epic # 302 + iks[3].type = t_epic # 303 + iks[4].type = t_epic # 304 - data.issues[13].type = t_feature # 350 + iks[13].type = t_feature # 350 - data.issues[5].type = t_task - data.issues[6].type = t_task - data.issues[7].type = t_task - data.issues[8].type = t_task - data.issues[9].type = t_task - data.issues[10].type = t_task - data.issues[11].type = t_task - data.issues[12].type = t_task + iks[5].type = t_task + iks[6].type = t_task + iks[7].type = t_task + iks[8].type = t_task + iks[9].type = t_task + iks[10].type = t_task + iks[11].type = t_task + iks[12].type = t_task return data diff --git a/tests/release_notes/builder/test_release_notes_builder.py b/tests/release_notes/builder/test_release_notes_builder.py index 50f7f127..5e160c8e 100644 --- a/tests/release_notes/builder/test_release_notes_builder.py +++ b/tests/release_notes/builder/test_release_notes_builder.py @@ -1614,6 +1614,10 @@ def test_build_hierarchy_rls_notes_no_labels_no_type( actual_release_notes = builder.build() + print("XXX") + print(actual_release_notes) + print("XXX") + assert expected_release_notes == actual_release_notes diff --git a/tests/release_notes/data/test_filter.py b/tests/release_notes/data/test_filter.py index 9188f09e..d6ffab87 100644 --- a/tests/release_notes/data/test_filter.py +++ b/tests/release_notes/data/test_filter.py @@ -22,11 +22,12 @@ from release_notes_generator.data.filter import FilterByRelease from release_notes_generator.model.mined_data import MinedData +from tests.conftest import mock_repo def test_filter_no_release(mocker): - mock_log_info = mocker.patch("release_notes_generator.filter.logger.info") - mock_log_debug = mocker.patch("release_notes_generator.filter.logger.debug") + mock_log_info = mocker.patch("release_notes_generator.data.filter.logger.info") + mock_log_debug = mocker.patch("release_notes_generator.data.filter.logger.debug") # Mock MinedData data = MagicMock(spec=MinedData) @@ -50,8 +51,8 @@ def test_filter_no_release(mocker): def test_filter_with_release(mocker): - mock_log_info = mocker.patch("release_notes_generator.filter.logger.info") - mock_log_debug = mocker.patch("release_notes_generator.filter.logger.debug") + mock_log_info = mocker.patch("release_notes_generator.data.filter.logger.info") + mock_log_debug = mocker.patch("release_notes_generator.data.filter.logger.debug") # Mock MinedData data = MagicMock(spec=MinedData) @@ -60,18 +61,18 @@ def test_filter_with_release(mocker): data.since = datetime(2023, 1, 1) # Mock issues, pull requests, and commits - data.issues = [ - MagicMock(closed_at=datetime(2023, 1, 2)), - MagicMock(closed_at=datetime(2022, 12, 31)), - ] - data.pull_requests = [ - MagicMock(merged_at=datetime(2023, 2, 3), closed_at=datetime(2022, 12, 31)), - MagicMock(merged_at=datetime(2022, 12, 30), closed_at=datetime(2022, 12, 31)), - ] - data.commits = [ - MagicMock(commit=MagicMock(author=MagicMock(date=datetime(2024, 1, 4)))), - MagicMock(commit=MagicMock(author=MagicMock(date=datetime(2022, 12, 29)))), - ] + data.issues = { + MagicMock(closed_at=datetime(2023, 1, 2)): data.home_repository, + MagicMock(closed_at=datetime(2022, 12, 31)): data.home_repository, + } + data.pull_requests = { + MagicMock(merged_at=datetime(2023, 2, 3), closed_at=datetime(2022, 12, 31)): data.home_repository, + MagicMock(merged_at=datetime(2022, 12, 30), closed_at=datetime(2022, 12, 31)): data.home_repository, + } + data.commits = { + MagicMock(commit=MagicMock(author=MagicMock(date=datetime(2024, 1, 4)))): data.home_repository, + MagicMock(commit=MagicMock(author=MagicMock(date=datetime(2022, 12, 29)))): data.home_repository, + } # Apply filter filter_instance = FilterByRelease() @@ -81,9 +82,9 @@ def test_filter_with_release(mocker): assert len(filtered_data.issues) == 1 assert len(filtered_data.pull_requests) == 1 assert len(filtered_data.commits) == 1 - assert filtered_data.issues[0].closed_at == datetime(2023, 1, 2) - assert filtered_data.pull_requests[0].merged_at == datetime(2023, 2, 3) - assert filtered_data.commits[0].commit.author.date == datetime(2024, 1, 4) + assert list(filtered_data.issues.keys())[0].closed_at == datetime(2023, 1, 2) + assert list(filtered_data.pull_requests.keys())[0].merged_at == datetime(2023, 2, 3) + assert list(filtered_data.commits.keys())[0].commit.author.date == datetime(2024, 1, 4) assert ('Starting issue, prs and commit reduction by the latest release since time.',) == mock_log_info.call_args_list[0][0] assert ('Count of issues reduced from %d to %d', 2, 1) == mock_log_debug.call_args_list[1][0] assert ('Count of pulls reduced from %d to %d', 2, 1) == mock_log_debug.call_args_list[2][0] diff --git a/tests/release_notes/data/test_miner.py b/tests/release_notes/data/test_miner.py index 79ea6f1a..eda9302a 100644 --- a/tests/release_notes/data/test_miner.py +++ b/tests/release_notes/data/test_miner.py @@ -41,24 +41,24 @@ class MinedDataMock(MinedData): def __init__(self, mocker, rls_mock: Optional[GitRelease], mock_repo: Repository): super().__init__(mock_repo) self.release = rls_mock if rls_mock is not None else mocker.Mock(spec=GitRelease) - self.issues = [ - mocker.Mock(spec=Issue, title="Mock Issue 1", number=1), - mocker.Mock(spec=Issue, title="Mock Issue 2", number=2), - ] - self.pull_requests = [ - mocker.Mock(spec=PullRequest, title="Mock PR 1", number=1), - mocker.Mock(spec=PullRequest, title="Mock PR 2", number=2), - ] - self.commits = [ - mocker.Mock(spec=Commit, sha="abc123", commit={"message": "Mock Commit 1"}), - mocker.Mock(spec=Commit, sha="def456", commit={"message": "Mock Commit 2"}), - ] + self.issues = { + mocker.Mock(spec=Issue, title="Mock Issue 1", number=1): mock_repo, + mocker.Mock(spec=Issue, title="Mock Issue 2", number=2): mock_repo, + } + self.pull_requests = { + mocker.Mock(spec=PullRequest, title="Mock PR 1", number=1): mock_repo, + mocker.Mock(spec=PullRequest, title="Mock PR 2", number=2): mock_repo, + } + self.commits = { + mocker.Mock(spec=Commit, sha="abc123", commit={"message": "Mock Commit 1"}): mock_repo, + mocker.Mock(spec=Commit, sha="def456", commit={"message": "Mock Commit 2"}): mock_repo, + } self.since = datetime.now() def test_get_latest_release_from_tag_name_not_defined_2_releases_type_error(mocker, mock_repo, mock_git_releases): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=False) - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") - mock_log_error = mocker.patch("release_notes_generator.miner.logger.error") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") + mock_log_error = mocker.patch("release_notes_generator.data.miner.logger.error") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo @@ -84,8 +84,8 @@ def test_get_latest_release_from_tag_name_not_defined_2_releases_type_error(mock def test_get_latest_release_from_tag_name_not_defined_2_releases_value_error(mocker, mock_repo, mock_git_releases): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=False) - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") - mock_log_error = mocker.patch("release_notes_generator.miner.logger.error") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") + mock_log_error = mocker.patch("release_notes_generator.data.miner.logger.error") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo @@ -113,7 +113,7 @@ def test_get_latest_release_from_tag_name_not_defined_2_releases_value_error(moc def test_get_latest_release_from_tag_name_not_defined_2_releases(mocker, mock_repo, mock_git_releases): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=False) - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo @@ -135,7 +135,7 @@ def test_get_latest_release_from_tag_name_not_defined_2_releases(mocker, mock_re def test_get_latest_release_from_tag_name_not_defined_no_release(mocker, mock_repo): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=False) - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo @@ -160,7 +160,7 @@ def test_get_latest_release_from_tag_name_not_defined_no_release(mocker, mock_re def test_get_latest_release_from_tag_name_defined_release_exists(mocker, mock_repo): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=True) mock_exit = mocker.patch("sys.exit") - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo @@ -187,8 +187,8 @@ def test_get_latest_release_from_tag_name_defined_release_exists(mocker, mock_re def test_get_latest_release_from_tag_name_defined_no_release(mocker, mock_repo): mocker.patch("release_notes_generator.action_inputs.ActionInputs.is_from_tag_name_defined", return_value=True) mock_exit = mocker.patch("sys.exit") - mock_log_error = mocker.patch("release_notes_generator.miner.logger.error") - mock_log_info = mocker.patch("release_notes_generator.miner.logger.info") + mock_log_error = mocker.patch("release_notes_generator.data.miner.logger.error") + mock_log_info = mocker.patch("release_notes_generator.data.miner.logger.info") github_mock = mocker.Mock(spec=Github) github_mock.get_repo.return_value = mock_repo diff --git a/tests/release_notes/record/factory/test_default_record_factory.py b/tests/release_notes/record/factory/test_default_record_factory.py index 093e239f..c7bc2ca4 100644 --- a/tests/release_notes/record/factory/test_default_record_factory.py +++ b/tests/release_notes/record/factory/test_default_record_factory.py @@ -187,12 +187,12 @@ def test_generate_with_issues_and_pulls_and_commits(mocker, mock_repo): mock_github_client.get_rate_limit.return_value = mock_rate_limit data = MinedData(mock_repo) - data.issues = [issue1] - data.pull_requests = [pr1] + data.issues = {issue1: mock_repo} + data.pull_requests = {pr1: mock_repo} commit3 = mocker.Mock(spec=Commit) commit3.sha = "ghi789" commit3.repository = mock_repo - data.commits = [commit1, commit2, commit3] + data.commits = {commit1: mock_repo, commit2: mock_repo, commit3: mock_repo} records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -230,9 +230,9 @@ def test_generate_with_issues_and_pulls_and_commits_with_skip_labels(mocker, moc commit3.repository.full_name = "org/repo" data = MinedData(mock_repo) - data.issues = [issue1, issue2] - data.pull_requests = [pr1, pr2] - data.commits = [commit1, commit2, commit3] + data.issues = {issue1: mock_repo, issue2: mock_repo} + data.pull_requests = {pr1: mock_repo, pr2: mock_repo} + data.commits = {commit1: mock_repo, commit2: mock_repo, commit3: mock_repo} records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -272,8 +272,8 @@ def test_generate_with_no_commits(mocker, mock_repo): data = MinedData(mock_repo) # pylint: disable=unused-variable issue1, issue2, pr1, pr2, commit1, commit2 = setup_issues_pulls_commits(mocker, mock_repo) - data.issues = [issue1] - data.pull_requests = [pr1] # PR linked to a non-fetched issues (due to since condition) + data.issues = {issue1: mock_repo} + data.pull_requests = {pr1: mock_repo} # PR linked to a non-fetched issues (due to since condition) mock_rate_limit = mocker.Mock() mock_rate_limit.rate.remaining = 10 @@ -281,7 +281,7 @@ def test_generate_with_no_commits(mocker, mock_repo): mock_github_client.get_rate_limit.return_value = mock_rate_limit mock_repo.get_issue.return_value = issue2 - data.commits = [] # No commits + data.commits = {} # No commits mocker.patch("release_notes_generator.record.factory.default_record_factory.get_issues_for_pr", return_value=['org/repo#2']) records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -306,8 +306,8 @@ def test_generate_with_no_commits_with_wrong_issue_number_in_pull_body_mention(m # pylint: disable=unused-variable issue1, issue2, pr1, pr2, commit1, commit2 = setup_issues_pulls_commits(mocker, mock_repo) pr1.body = "Closes #2" - data.issues = [issue1] - data.pull_requests = [pr1] # PR linked to a non-fetched issues (due to since condition) + data.issues = {issue1: mock_repo} + data.pull_requests = {pr1: mock_repo} # PR linked to a non-fetched issues (due to since condition) mock_rate_limit = mocker.Mock() mock_rate_limit.rate.remaining = 10 @@ -315,7 +315,7 @@ def test_generate_with_no_commits_with_wrong_issue_number_in_pull_body_mention(m mock_github_client.get_rate_limit.return_value = mock_rate_limit mock_repo.get_issue.return_value = issue2 - data.commits = [] # No commits + data.commits = {} # No commits mocker.patch("release_notes_generator.record.factory.default_record_factory.get_issues_for_pr", return_value=['org/repo#2']) records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -350,9 +350,9 @@ def test_generate_with_no_issues(mocker, mock_repo, request): mock_github_client = mocker.Mock(spec=Github) data = MinedData(mock_repo) pr1, pr2, commit1, commit2 = setup_no_issues_pulls_commits(mocker) - data.pull_requests = [pr1, pr2] - data.commits = [commit1, commit2] - data.issues = [] # No issues + data.pull_requests = {pr1: mock_repo, pr2: mock_repo} + data.commits = {commit1: mock_repo, commit2: mock_repo} + data.issues = {} # No issues records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -390,10 +390,10 @@ def test_generate_with_no_issues_skip_labels(mocker, mock_repo, request): pr1.get_labels.return_value = [mock_label1] pr2.get_labels.return_value = [mock_label2] - data.pull_requests = [pr1, pr2] - data.commits = [commit1, commit2] + data.pull_requests = {pr1: mock_repo, pr2: mock_repo} + data.commits = {commit1: mock_repo, commit2: mock_repo} - data.issues = [] # No issues + data.issues = {} # No issues records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) @@ -421,9 +421,9 @@ def test_generate_with_no_pulls(mocker, mock_repo): mock_github_client = mocker.Mock(spec=Github) data = MinedData(mock_repo) issue1, issue2 = setup_issues_no_pulls_no_commits(mocker) - data.issues = [issue1, issue2] - data.pull_requests = [] # No pull requests - data.commits = [] # No commits + data.issues = {issue1: mock_repo, issue2: mock_repo} + data.pull_requests = {} # No pull requests + data.commits = {} # No commits records = DefaultRecordFactory(mock_github_client, mock_repo).generate(data) # Verify the record creation diff --git a/tests/test_release_notes_generator.py b/tests/test_release_notes_generator.py index 5b9ae70b..4654e8ec 100644 --- a/tests/test_release_notes_generator.py +++ b/tests/test_release_notes_generator.py @@ -114,7 +114,7 @@ def test_generate_release_notes_latest_release_found_by_created_at( mock_git_release.created_at = mock_repo.created_at + timedelta(days=5) mock_git_release.published_at = mock_repo.created_at + timedelta(days=5) - mocker.patch("release_notes_generator.miner.DataMiner.get_latest_release", return_value=mock_git_release) + mocker.patch("release_notes_generator.data.miner.DataMiner.get_latest_release", return_value=mock_git_release) mock_rate_limit = mocker.Mock() mock_rate_limit.rate.remaining = 1000 @@ -169,7 +169,7 @@ def test_generate_release_notes_latest_release_found_by_published_at( github_mock.get_repo().get_latest_release.return_value = mock_git_release mock_git_release.created_at = mock_repo.created_at + timedelta(days=5) mock_git_release.published_at = mock_repo.created_at + timedelta(days=5) - mocker.patch("release_notes_generator.miner.DataMiner.get_latest_release", return_value=mock_git_release) + mocker.patch("release_notes_generator.data.miner.DataMiner.get_latest_release", return_value=mock_git_release) mock_rate_limit = mocker.Mock() mock_rate_limit.rate.remaining = 1000 From c97bfe168e742e5c8d6a31abbbb166594a5a0e84 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 14:36:55 +0200 Subject: [PATCH 4/8] Fixed rest of unit tests. --- .../factory/issue_hierarchy_record_factory.py | 153 +++--------------- .../builder/test_release_notes_builder.py | 4 - .../test_issue_hierarchy_record_factory.py | 24 +-- 3 files changed, 36 insertions(+), 145 deletions(-) diff --git a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py index a250e7d6..3bfda3f3 100644 --- a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py +++ b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py @@ -19,6 +19,7 @@ """ import logging +from copy import deepcopy from typing import cast, Optional from github import Github @@ -51,12 +52,8 @@ def __init__(self, github: Github, home_repository: Repository) -> None: super().__init__(github, home_repository) self.__registered_issues: set[str] = set() - self.__sub_issue_parents: dict[str, str] = {} self.__registered_commits: set[str] = set() - # self.__external_sub_issues: list[SubIssue] = [] - # self.__local_sub_issues_checked: list[str] = [] - def generate(self, data: MinedData) -> dict[str, Record]: """ Generate records for release notes. @@ -70,15 +67,12 @@ def generate(self, data: MinedData) -> dict[str, Record]: logger.debug("Creation of records started...") # NEW: uz mam mnapovani, kdo je hierarchy, kdo je SubIssue a kdo je Issue for issue, repo in data.issues.items(): iid = get_id(issue, repo) - # tmp note - tohle by nemelo by za potrebi - nove uz mam vsechno info, abych to udelal v 1 prubehu - # if get_id(issue) in self.__registered_issues: - # continue if len(data.parents_sub_issues[iid]) > 0: # issue has sub-issues - it is either hierarchy issue or sub-hierarchy issue self._create_record_for_hierarchy_issue(issue, iid) - elif id in self.__sub_issue_parents.values(): + elif any(iid in sublist for sublist in data.parents_sub_issues.values()): # issue has no sub-issues - it is sub-issue self._create_record_for_sub_issue(issue, iid) @@ -86,46 +80,6 @@ def generate(self, data: MinedData) -> dict[str, Record]: # issue is not sub-issue and has no sub-issues - it is issue self._create_record_for_issue(issue, iid) - # old code for delete after testing - # First register all issues with sub-issues - # for issue in data.issues: - # if get_id(issue) in self.__registered_issues: - # continue - # - # self._create_issue_record_using_sub_issues_existence(issue, data) - - # Second register all hierarchy issues from sub-issues - # registered_before = -1 - # while registered_before < len(self.__registered_issues): - # registered_before = len(self.__registered_issues) - # logger.debug("Looking for hierarchical issue among sub-issues...") - # - # issues_expansion = [] - # for issue in data.issues: - # iid = self.get_id(issue) - # if iid in self.__registered_issues: - # continue - # - # if iid in self.__sub_issue_parents and iid not in self.__local_sub_issues_checked: - # issues_expansion.extend(self._create_issue_record_using_sub_issues_existence(issue, data)) - # self.__local_sub_issues_checked.append(iid) - # - # data.issues.extend(issues_expansion) - - # Third register all external sub-issues - # for ext_sub_issue in self.__external_sub_issues: - # if self.get_id(ext_sub_issue) in self.__registered_issues: - # continue - # - # self._create_record_for_sub_issue(ext_sub_issue) - - # Now register all issues without sub-issues - # for issue in data.issues: - # if self.get_id(issue) in self.__registered_issues: - # continue - # - # self._create_issue_record_using_sub_issues_not_existence(issue) - # dev note: Each issue is now in records dict by its issue number - all on same level - no hierarchy # This is useful for population by PRs and commits @@ -141,7 +95,15 @@ def generate(self, data: MinedData) -> dict[str, Record]: # dev note: now we have all PRs and commits registered to issues or as stand-alone records # let build hierarchy logger.debug("Building issues hierarchy...") - self._re_register_hierarchy_issues() + + self._re_register_hierarchy_issues( + sub_issues_ids = list({iid for sublist in data.parents_sub_issues.values() for iid in sublist}), + sub_issue_parents = { + sub_issue: parent + for parent, sublist in data.parents_sub_issues.items() + for sub_issue in sublist + } + ) self.order_hierarchy_levels() logger.info( @@ -181,7 +143,7 @@ def _register_pull_and_its_commits_to_issue( repo_full_name = f"{org}/{repo}" parent_issue = self._safe_call(data.get_repository(repo_full_name).get_issue)(num) if data.get_repository(repo_full_name) is not None else None if parent_issue is not None: - self._create_record_for_issue(parent_issue) + self._create_record_for_issue(parent_issue, get_id(parent_issue, data.get_repository(repo_full_name))) if issue_id in self._records and isinstance( self._records[issue_id], (SubIssueRecord, HierarchyIssueRecord, IssueRecord) @@ -203,74 +165,6 @@ def _register_pull_and_its_commits_to_issue( self._records[pid] = pr_rec logger.debug("Created record for PR %s: %s", pid, pull.title) - # def _create_issue_record_using_sub_issues_existence(self, issue: Issue, data: MinedData) -> list[SubIssue]: - # # use presence of sub-issues as a hint for hierarchy issue or non hierarchy issue - # iid = get_id(issue) - # sub_issues_ids = data.parents_sub_issues[iid] - # logger.debug("Found %d sub-issues for %d", len(sub_issues_ids), issue.number) - # - # if len(sub_issues_ids) > 0: - # self._create_record_for_hierarchy_issue(issue) - # for siid in sub_issues_ids: - # org, repo, num = parse_issue_id(siid) - # - # # check if sub-issue is from current repository - # if f"{org}/{repo}" != issue.repository.full_name: - # # register sub-issue and its parent for later hierarchy building - # # Note: GitHub now allows only 1 parent - # self.__sub_issue_parents[siid] = get_id(issue) - # - # # TODO - fetch - # - # self.__external_sub_issues.append(si) - # logger.debug( - # "Detected sub-issue %d from different repository %s - adding as external sub-issue" - # " for later processing", - # num, - # f"{org}/{repo}", - # ) - # - # else: - # use_issue = False - # if ( - # data.since - # and si.state == IssueRecord.ISSUE_STATE_CLOSED - # and si.closed_at - # and data.since > si.closed_at - # ): - # logger.debug("Detected sub-issue %d closed in previous release.", si.number) - # if len(list(si.get_sub_issues())) > 0: - # use_issue = True - # else: - # self.__registered_issues.add(siid) - # - # elif si.state == IssueRecord.ISSUE_STATE_OPEN: - # logger.debug("Detected sub-issue %d is still open.", si.number) - # if len(list(si.get_sub_issues())) > 0: - # use_issue = True - # else: - # self.__registered_issues.add(siid) - # - # elif si.state == IssueRecord.ISSUE_STATE_CLOSED: # issue is valid - # use_issue = True - # - # else: - # logger.warning("Detected unexpected sub-issue %d with parent %d", si.number, issue.number) - # - # if use_issue: - # self.__sub_issue_parents[siid] = get_id(issue) - # if si not in data.issues: - # new_local_issues.append(si) - # - # return new_local_issues - - # def _create_issue_record_using_sub_issues_not_existence(self, issue: Issue) -> None: - # # Expected to run after all issue with sub-issues are registered - # if get_id(issue) in self.__sub_issue_parents.keys(): # pylint: disable=consider-iterating-dictionary - # self._create_record_for_sub_issue(issue) - # else: - # self._create_record_for_issue(issue) - def _create_record_for_hierarchy_issue(self, i: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: """ Create a hierarchy issue record and register sub-issues. @@ -322,10 +216,9 @@ def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Op self._records[iid].is_cross_repo = True - def _re_register_hierarchy_issues(self): + def _re_register_hierarchy_issues(self, sub_issues_ids: list[str], sub_issue_parents: dict[str, str]): logger.debug("Re-registering hierarchy issues ...") - - sub_issues_ids: list[str] = list(self.__sub_issue_parents.keys()) + reduced_sub_issue_ids: list[str] = deepcopy(sub_issues_ids) made_progress = False for sub_issue_id in sub_issues_ids: @@ -333,24 +226,25 @@ def _re_register_hierarchy_issues(self): # as sub-issue or sub-hierarchy-issue # but do it only for issue where parent issue number is not in _sub_issue_parents keys # Why? We building hierarchy from bottom. Access in records is very easy. - if sub_issue_id in self.__sub_issue_parents.values(): + if sub_issue_id in sub_issue_parents.values(): continue - parent_issue_id: str = self.__sub_issue_parents[sub_issue_id] + parent_issue_id: str = sub_issue_parents[sub_issue_id] parent_rec = cast(HierarchyIssueRecord, self._records[parent_issue_id]) sub_rec = self._records[sub_issue_id] - # TODO - check this localtion - is possiblem that there is saved another type than SubIssueRecord if isinstance(sub_rec, SubIssueRecord): parent_rec.sub_issues[sub_issue_id] = sub_rec # add to parent as SubIssueRecord self._records.pop(sub_issue_id) # remove from main records as it is sub-one - self.__sub_issue_parents.pop(sub_issue_id) # remove from sub-parents as it is now sub-one + reduced_sub_issue_ids.remove(sub_issue_id) # remove from sub-parents as it is now sub-one + sub_issue_parents.pop(sub_issue_id) made_progress = True logger.debug("Added sub-issue %s to parent %s", sub_issue_id, parent_issue_id) elif isinstance(sub_rec, HierarchyIssueRecord): parent_rec.sub_hierarchy_issues[sub_issue_id] = sub_rec # add to parent as 'Sub' HierarchyIssueRecord self._records.pop(sub_issue_id) # remove from main records as it is sub-one - self.__sub_issue_parents.pop(sub_issue_id) # remove from sub-parents as it is now sub-one + reduced_sub_issue_ids.remove(sub_issue_id) # remove from sub-parents as it is now sub-one + sub_issue_parents.pop(sub_issue_id) made_progress = True logger.debug("Added sub-hierarchy-issue %s to parent %s", sub_issue_id, parent_issue_id) else: @@ -358,10 +252,11 @@ def _re_register_hierarchy_issues(self): "Detected IssueRecord in position of SubIssueRecord - leaving as standalone and dropping mapping" ) # Avoid infinite recursion by removing the unresolved mapping - self.__sub_issue_parents.pop(sub_issue_id, None) + reduced_sub_issue_ids.remove(sub_issue_id) + sub_issue_parents.pop(sub_issue_id) - if self.__sub_issue_parents and made_progress: - self._re_register_hierarchy_issues() + if reduced_sub_issue_ids and made_progress: + self._re_register_hierarchy_issues(reduced_sub_issue_ids, sub_issue_parents) def order_hierarchy_levels(self, level: int = 0) -> None: """ diff --git a/tests/release_notes/builder/test_release_notes_builder.py b/tests/release_notes/builder/test_release_notes_builder.py index 5e160c8e..50f7f127 100644 --- a/tests/release_notes/builder/test_release_notes_builder.py +++ b/tests/release_notes/builder/test_release_notes_builder.py @@ -1614,10 +1614,6 @@ def test_build_hierarchy_rls_notes_no_labels_no_type( actual_release_notes = builder.build() - print("XXX") - print(actual_release_notes) - print("XXX") - assert expected_release_notes == actual_release_notes diff --git a/tests/release_notes/record/factory/test_issue_hierarchy_record_factory.py b/tests/release_notes/record/factory/test_issue_hierarchy_record_factory.py index 3be9dfa1..6dcb1608 100644 --- a/tests/release_notes/record/factory/test_issue_hierarchy_record_factory.py +++ b/tests/release_notes/record/factory/test_issue_hierarchy_record_factory.py @@ -77,21 +77,21 @@ def test_generate_isolated_record_types_no_labels_no_type_defined(mocker, mock_r rec_hi_1 = cast(HierarchyIssueRecord, result['org/repo#301']) assert 0 == rec_hi_1.pull_requests_count() assert 0 == len(rec_hi_1.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_1.sub_issues.values()) + assert 2 == len(rec_hi_1.sub_issues.values()) assert 0 == rec_hi_1.sub_issues['org/repo#450'].pull_requests_count() assert 0 == rec_hi_1.level rec_hi_2 = cast(HierarchyIssueRecord, result['org/repo#302']) assert 1 == rec_hi_2.pull_requests_count() assert 0 == len(rec_hi_2.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_2.sub_issues.values()) + assert 2 == len(rec_hi_2.sub_issues.values()) assert 1 == rec_hi_2.sub_issues['org/repo#451'].pull_requests_count() assert 0 == rec_hi_2.level rec_hi_3 = cast(HierarchyIssueRecord, result['org/repo#303']) assert 1 == rec_hi_3.pull_requests_count() assert 0 == len(rec_hi_3.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_3.sub_issues.values()) + assert 2 == len(rec_hi_3.sub_issues.values()) assert 1 == rec_hi_3.sub_issues['org/repo#452'].pull_requests_count() assert "Fixed bug in PR 151" == rec_hi_3.sub_issues['org/repo#452'].get_commit(151, "merge_commit_sha_151").commit.message assert 0 == rec_hi_3.level @@ -147,19 +147,19 @@ def test_generate_isolated_record_types_with_labels_no_type_defined(mocker, mock rec_hi_1 = cast(HierarchyIssueRecord, result['org/repo#301']) assert 0 == rec_hi_1.pull_requests_count() assert 0 == len(rec_hi_1.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_1.sub_issues.values()) + assert 2 == len(rec_hi_1.sub_issues.values()) assert 0 == rec_hi_1.sub_issues['org/repo#450'].pull_requests_count() rec_hi_2 = cast(HierarchyIssueRecord, result['org/repo#302']) assert 1 == rec_hi_2.pull_requests_count() assert 0 == len(rec_hi_2.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_2.sub_issues.values()) + assert 2 == len(rec_hi_2.sub_issues.values()) assert 1 == rec_hi_2.sub_issues['org/repo#451'].pull_requests_count() rec_hi_3 = cast(HierarchyIssueRecord, result['org/repo#303']) assert 1 == rec_hi_3.pull_requests_count() assert 0 == len(rec_hi_3.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_3.sub_issues.values()) + assert 2 == len(rec_hi_3.sub_issues.values()) assert 1 == rec_hi_3.sub_issues['org/repo#452'].pull_requests_count() assert "Fixed bug in PR 151" == rec_hi_3.sub_issues['org/repo#452'].get_commit(151, "merge_commit_sha_151").commit.message @@ -210,19 +210,19 @@ def test_generate_isolated_record_types_no_labels_with_type_defined(mocker, mock rec_hi_1 = cast(HierarchyIssueRecord, result['org/repo#301']) assert 0 == rec_hi_1.pull_requests_count() assert 0 == len(rec_hi_1.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_1.sub_issues.values()) + assert 2 == len(rec_hi_1.sub_issues.values()) assert 0 == rec_hi_1.sub_issues['org/repo#450'].pull_requests_count() rec_hi_2 = cast(HierarchyIssueRecord, result['org/repo#302']) assert 1 == rec_hi_2.pull_requests_count() assert 0 == len(rec_hi_2.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_2.sub_issues.values()) + assert 2 == len(rec_hi_2.sub_issues.values()) assert 1 == rec_hi_2.sub_issues['org/repo#451'].pull_requests_count() rec_hi_3 = cast(HierarchyIssueRecord, result['org/repo#303']) assert 1 == rec_hi_3.pull_requests_count() assert 0 == len(rec_hi_3.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_3.sub_issues.values()) + assert 2 == len(rec_hi_3.sub_issues.values()) assert 1 == rec_hi_3.sub_issues['org/repo#452'].pull_requests_count() assert "Fixed bug in PR 151" == rec_hi_3.sub_issues['org/repo#452'].get_commit(151, "merge_commit_sha_151").commit.message @@ -273,19 +273,19 @@ def test_generate_isolated_record_types_with_labels_with_type_defined(mocker, mo rec_hi_1 = cast(HierarchyIssueRecord, result['org/repo#301']) assert 0 == rec_hi_1.pull_requests_count() assert 0 == len(rec_hi_1.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_1.sub_issues.values()) + assert 2 == len(rec_hi_1.sub_issues.values()) assert 0 == rec_hi_1.sub_issues['org/repo#450'].pull_requests_count() rec_hi_2 = cast(HierarchyIssueRecord, result['org/repo#302']) assert 1 == rec_hi_2.pull_requests_count() assert 0 == len(rec_hi_2.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_2.sub_issues.values()) + assert 2 == len(rec_hi_2.sub_issues.values()) assert 1 == rec_hi_2.sub_issues['org/repo#451'].pull_requests_count() rec_hi_3 = cast(HierarchyIssueRecord, result['org/repo#303']) assert 1 == rec_hi_3.pull_requests_count() assert 0 == len(rec_hi_3.sub_hierarchy_issues.values()) - assert 1 == len(rec_hi_3.sub_issues.values()) + assert 2 == len(rec_hi_3.sub_issues.values()) assert 1 == rec_hi_3.sub_issues['org/repo#452'].pull_requests_count() assert "Fixed bug in PR 151" == rec_hi_3.sub_issues['org/repo#452'].get_commit(151, "merge_commit_sha_151").commit.message From 569c2797b8d760460dd805a014dd8f66f0b5ca9c Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 15:32:18 +0200 Subject: [PATCH 5/8] Fixed all checkers. --- .pylintrc | 4 +- integration_test.py | 18 +++++---- release_notes_generator/data/filter.py | 19 +++++----- release_notes_generator/data/miner.py | 38 +++++++++++++------ .../data/utils/bulk_sub_issue_collector.py | 26 +++++++------ release_notes_generator/model/mined_data.py | 3 +- release_notes_generator/model/record.py | 2 - .../record/factory/default_record_factory.py | 5 ++- .../factory/issue_hierarchy_record_factory.py | 23 +++++------ release_notes_generator/utils/record_utils.py | 27 +++++++++++-- 10 files changed, 107 insertions(+), 58 deletions(-) diff --git a/.pylintrc b/.pylintrc index ec9a1731..5bfe46a3 100644 --- a/.pylintrc +++ b/.pylintrc @@ -298,7 +298,7 @@ ignored-parents= max-args=10 # Maximum number of attributes for a class (see R0902). -max-attributes=7 +max-attributes=10 # Maximum number of boolean expressions in an if statement (see R0916). max-bool-expr=5 @@ -470,7 +470,7 @@ notes-rgx= [REFACTORING] # Maximum number of nested blocks for function / method body -max-nested-blocks=5 +max-nested-blocks=6 # Complete name of functions that never returns. When checking for # inconsistent-return-statements if a never returning function is called then diff --git a/integration_test.py b/integration_test.py index a636eab1..b6304384 100644 --- a/integration_test.py +++ b/integration_test.py @@ -1,22 +1,26 @@ -import urllib3 -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +""" +This script demonstrates how to use the BulkSubIssueCollector to find sub-issues +""" import os +import urllib3 from release_notes_generator.data.utils.bulk_sub_issue_collector import CollectorConfig, BulkSubIssueCollector +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + token = os.getenv("GITHUB_TOKEN") +if token is None: + raise ValueError("GITHUB_TOKEN environment variable is not set") # If you need to disable TLS verification (to mirror your example): cfg = CollectorConfig(verify_tls=False) collector = BulkSubIssueCollector(token, cfg=cfg) -# new_parents = [ -# "absa-group/AUL#2960", -# ] - -new_parents = ['absa-group/AUL#3800', 'absa-group/AUL#3799', 'absa-group/AUL#3797', 'absa-group/AUL#3796', 'absa-group/AUL#3794', 'absa-group/AUL#3793', 'absa-group/AUL#3792', 'absa-group/AUL#3791', 'absa-group/AUL#3790', 'absa-group/AUL#3788', 'absa-group/AUL#3787', 'absa-group/AUL#3783', 'absa-group/AUL#3780', 'absa-group/AUL#3779', 'absa-group/AUL#3776', 'absa-group/AUL#3768', 'absa-group/AUL#3767', 'absa-group/AUL#3758', 'absa-group/AUL#3756', 'absa-group/AUL#3744', 'absa-group/AUL#3742', 'absa-group/AUL#3740', 'absa-group/AUL#3737', 'absa-group/AUL#3735', 'absa-group/AUL#3726', 'absa-group/AUL#3725', 'absa-group/AUL#3724', 'absa-group/AUL#3723', 'absa-group/AUL#3722', 'absa-group/AUL#3721', 'absa-group/AUL#3720', 'absa-group/AUL#3718', 'absa-group/AUL#3716', 'absa-group/AUL#3710', 'absa-group/AUL#3709', 'absa-group/AUL#3708', 'absa-group/AUL#3707', 'absa-group/AUL#3706', 'absa-group/AUL#3705', 'absa-group/AUL#3704', 'absa-group/AUL#3700', 'absa-group/AUL#3694', 'absa-group/AUL#3691', 'absa-group/AUL#3689', 'absa-group/AUL#3688', 'absa-group/AUL#3687', 'absa-group/AUL#3686', 'absa-group/AUL#3684', 'absa-group/AUL#3674', 'absa-group/AUL#3672', 'absa-group/AUL#3665', 'absa-group/AUL#3664', 'absa-group/AUL#3659', 'absa-group/AUL#3599', 'absa-group/AUL#3588', 'absa-group/AUL#3585', 'absa-group/AUL#3583', 'absa-group/AUL#3527', 'absa-group/AUL#3516', 'absa-group/AUL#3501', 'absa-group/AUL#3488', 'absa-group/AUL#3487', 'absa-group/AUL#3486', 'absa-group/AUL#3482', 'absa-group/AUL#3301', 'absa-group/AUL#3292', 'absa-group/AUL#3281', 'absa-group/AUL#3226', 'absa-group/AUL#3207', 'absa-group/AUL#3009', 'absa-group/AUL#2960', 'absa-group/AUL#2849', 'absa-group/AUL#2832', 'absa-group/AUL#2824', 'absa-group/AUL#2560', 'absa-group/AUL#2048', 'absa-group/AUL#3749', 'absa-group/AUL#3746', 'absa-group/AUL#3745', 'absa-group/AUL#3738', 'absa-group/AUL#3736', 'absa-group/AUL#3734', 'absa-group/AUL#3712', 'absa-group/AUL#3698', 'absa-group/AUL#3697', 'absa-group/AUL#3695', 'absa-group/AUL#3685', 'absa-group/AUL#3682', 'absa-group/AUL#3679', 'absa-group/AUL#3675', 'absa-group/AUL#3671', 'absa-group/AUL#3669', 'absa-group/AUL#3658', 'absa-group/AUL#3657', 'absa-group/AUL#3656', 'absa-group/AUL#3655', 'absa-group/AUL#3654', 'absa-group/AUL#3653', 'absa-group/AUL#3652', 'absa-group/AUL#3651', 'absa-group/AUL#3650', 'absa-group/AUL#3649', 'absa-group/AUL#3648', 'absa-group/AUL#3642', 'absa-group/AUL#3635', 'absa-group/AUL#3619', 'absa-group/AUL#3613', 'absa-group/AUL#3612', 'absa-group/AUL#3607', 'absa-group/AUL#3603', 'absa-group/AUL#3600', 'absa-group/AUL#3598', 'absa-group/AUL#3596', 'absa-group/AUL#3595', 'absa-group/AUL#3586', 'absa-group/AUL#3582', 'absa-group/AUL#3571', 'absa-group/AUL#3559', 'absa-group/AUL#3556', 'absa-group/AUL#3554', 'absa-group/AUL#3553', 'absa-group/AUL#3550', 'absa-group/AUL#3548', 'absa-group/AUL#3547', 'absa-group/AUL#3544', 'absa-group/AUL#3539', 'absa-group/AUL#3536', 'absa-group/AUL#3535', 'absa-group/AUL#3534', 'absa-group/AUL#3533', 'absa-group/AUL#3530', 'absa-group/AUL#3529', 'absa-group/AUL#3526', 'absa-group/AUL#3525', 'absa-group/AUL#3522', 'absa-group/AUL#3515', 'absa-group/AUL#3514', 'absa-group/AUL#3498', 'absa-group/AUL#3474', 'absa-group/AUL#3473', 'absa-group/AUL#3465', 'absa-group/AUL#3464', 'absa-group/AUL#3457', 'absa-group/AUL#3453', 'absa-group/AUL#3452', 'absa-group/AUL#3437', 'absa-group/AUL#3405', 'absa-group/AUL#3399', 'absa-group/AUL#3385', 'absa-group/AUL#3380', 'absa-group/AUL#3373', 'absa-group/AUL#3360', 'absa-group/AUL#3328', 'absa-group/AUL#3322', 'absa-group/AUL#3317', 'absa-group/AUL#3299', 'absa-group/AUL#3298', 'absa-group/AUL#3297', 'absa-group/AUL#3295', 'absa-group/AUL#3294', 'absa-group/AUL#3287', 'absa-group/AUL#3286', 'absa-group/AUL#3285', 'absa-group/AUL#3284', 'absa-group/AUL#3275', 'absa-group/AUL#3266', 'absa-group/AUL#3265', 'absa-group/AUL#3264', 'absa-group/AUL#3263', 'absa-group/AUL#3262', 'absa-group/AUL#3251', 'absa-group/AUL#3247', 'absa-group/AUL#3246', 'absa-group/AUL#3237', 'absa-group/AUL#3236', 'absa-group/AUL#3235', 'absa-group/AUL#3234', 'absa-group/AUL#3233', 'absa-group/AUL#3224', 'absa-group/AUL#3219', 'absa-group/AUL#3206', 'absa-group/AUL#3203', 'absa-group/AUL#3201', 'absa-group/AUL#3181', 'absa-group/AUL#3177', 'absa-group/AUL#3173', 'absa-group/AUL#3172', 'absa-group/AUL#3153', 'absa-group/AUL#3141', 'absa-group/AUL#3122', 'absa-group/AUL#3104', 'absa-group/AUL#3091', 'absa-group/AUL#3085', 'absa-group/AUL#3084', 'absa-group/AUL#3080', 'absa-group/AUL#3079', 'absa-group/AUL#3072', 'absa-group/AUL#3052', 'absa-group/AUL#3020', 'absa-group/AUL#3006', 'absa-group/AUL#3000', 'absa-group/AUL#2933', 'absa-group/AUL#2932', 'absa-group/AUL#2920', 'absa-group/AUL#2913', 'absa-group/AUL#2912', 'absa-group/AUL#2887', 'absa-group/AUL#2886', 'absa-group/AUL#2884', 'absa-group/AUL#2883', 'absa-group/AUL#2876', 'absa-group/AUL#2873', 'absa-group/AUL#2847', 'absa-group/AUL#2842', 'absa-group/AUL#2841', 'absa-group/AUL#2835', 'absa-group/AUL#2823', 'absa-group/AUL#2819', 'absa-group/AUL#2810', 'absa-group/AUL#2806', 'absa-group/AUL#2801', 'absa-group/AUL#2781', 'absa-group/AUL#2768', 'absa-group/AUL#2754', 'absa-group/AUL#2742', 'absa-group/AUL#2732', 'absa-group/AUL#2725', 'absa-group/AUL#2724', 'absa-group/AUL#2722', 'absa-group/AUL#2720', 'absa-group/AUL#2712', 'absa-group/AUL#2709', 'absa-group/AUL#2707', 'absa-group/AUL#2706', 'absa-group/AUL#2691', 'absa-group/AUL#2679', 'absa-group/AUL#2678', 'absa-group/AUL#2674', 'absa-group/AUL#2671', 'absa-group/AUL#2668', 'absa-group/AUL#2664', 'absa-group/AUL#2660', 'absa-group/AUL#2649', 'absa-group/AUL#2647', 'absa-group/AUL#2631', 'absa-group/AUL#2619', 'absa-group/AUL#2590', 'absa-group/AUL#2584', 'absa-group/AUL#2583', 'absa-group/AUL#2551', 'absa-group/AUL#2549', 'absa-group/AUL#2530', 'absa-group/AUL#2507', 'absa-group/AUL#2504', 'absa-group/AUL#2503', 'absa-group/AUL#2494', 'absa-group/AUL#2484', 'absa-group/AUL#2477', 'absa-group/AUL#2470', 'absa-group/AUL#2439', 'absa-group/AUL#2432', 'absa-group/AUL#2416', 'absa-group/AUL#2414', 'absa-group/AUL#2412', 'absa-group/AUL#2411', 'absa-group/AUL#2407', 'absa-group/AUL#2405', 'absa-group/AUL#2404', 'absa-group/AUL#2400', 'absa-group/AUL#2394', 'absa-group/AUL#2391', 'absa-group/AUL#2385', 'absa-group/AUL#2374', 'absa-group/AUL#2369', 'absa-group/AUL#2368', 'absa-group/AUL#2367', 'absa-group/AUL#2366', 'absa-group/AUL#2365', 'absa-group/AUL#2364', 'absa-group/AUL#2344', 'absa-group/AUL#2343', 'absa-group/AUL#2338', 'absa-group/AUL#2335', 'absa-group/AUL#2332', 'absa-group/AUL#2326', 'absa-group/AUL#2320', 'absa-group/AUL#2319', 'absa-group/AUL#2318', 'absa-group/AUL#2313', 'absa-group/AUL#2304', 'absa-group/AUL#2303', 'absa-group/AUL#2302', 'absa-group/AUL#2301', 'absa-group/AUL#2300', 'absa-group/AUL#2299', 'absa-group/AUL#2298', 'absa-group/AUL#2216', 'absa-group/AUL#2200', 'absa-group/AUL#2181', 'absa-group/AUL#2175', 'absa-group/AUL#2174', 'absa-group/AUL#2167', 'absa-group/AUL#2163', 'absa-group/AUL#2147', 'absa-group/AUL#2142', 'absa-group/AUL#2118', 'absa-group/AUL#2114', 'absa-group/AUL#2112', 'absa-group/AUL#2096', 'absa-group/AUL#2061', 'absa-group/AUL#2059', 'absa-group/AUL#1966', 'absa-group/AUL#1965', 'absa-group/AUL#1961', 'absa-group/AUL#1925', 'absa-group/AUL#1924', 'absa-group/AUL#1901', 'absa-group/AUL#1857', 'absa-group/AUL#1796', 'absa-group/AUL#1674', 'absa-group/AUL#1669', 'absa-group/AUL#1668', 'absa-group/AUL#1665', 'absa-group/AUL#1662', 'absa-group/AUL#1660', 'absa-group/AUL#1634', 'absa-group/AUL#1577', 'absa-group/AUL#1571', 'absa-group/AUL#1566', 'absa-group/AUL#1559', 'absa-group/AUL#1555', 'absa-group/AUL#1552', 'absa-group/AUL#1545', 'absa-group/AUL#1520', 'absa-group/AUL#1517', 'absa-group/AUL#1516', 'absa-group/AUL#1507', 'absa-group/AUL#1506', 'absa-group/AUL#1505', 'absa-group/AUL#1504', 'absa-group/AUL#1502', 'absa-group/AUL#1501', 'absa-group/AUL#1497', 'absa-group/AUL#1496', 'absa-group/AUL#1485', 'absa-group/AUL#1483', 'absa-group/AUL#1433', 'absa-group/AUL#1416', 'absa-group/AUL#1407', 'absa-group/AUL#1406', 'absa-group/AUL#1360', 'absa-group/AUL#1356', 'absa-group/AUL#1353', 'absa-group/AUL#1351', 'absa-group/AUL#1347', 'absa-group/AUL#1343', 'absa-group/AUL#1333', 'absa-group/AUL#1331', 'absa-group/AUL#1322', 'absa-group/AUL#1319', 'absa-group/AUL#1298', 'absa-group/AUL#1249', 'absa-group/AUL#1188', 'absa-group/AUL#1024', 'absa-group/AUL#870', 'absa-group/AUL#621', 'absa-group/AUL#584', 'absa-group/AUL#478', 'absa-group/AUL#476', 'absa-group/AUL#413', 'absa-group/AUL#405', 'absa-group/AUL#137'] +new_parents = [ + "absa-group/AUL#2960", +] while new_parents: new_parents = collector.scan_sub_issues_for_parents(new_parents) diff --git a/release_notes_generator/data/filter.py b/release_notes_generator/data/filter.py index 9f733c60..88106ddb 100644 --- a/release_notes_generator/data/filter.py +++ b/release_notes_generator/data/filter.py @@ -85,12 +85,12 @@ def filter(self, data: MinedData) -> MinedData: if pull.number not in pulls_seen: pulls_seen.add(pull.number) pulls_dict[pull] = repo - logger.debug("Count of pulls reduced from %d to %d", len(data.pull_requests.items()), len(pulls_dict.items())) + logger.debug( + "Count of pulls reduced from %d to %d", len(data.pull_requests.items()), len(pulls_dict.items()) + ) commits_dict = { - commit: repo - for commit, repo in data.commits.items() - if commit.commit.author.date > data.since + commit: repo for commit, repo in data.commits.items() if commit.commit.author.date > data.since } logger.debug("Count of commits reduced from %d to %d", len(data.commits.items()), len(commits_dict.items())) @@ -145,7 +145,11 @@ def _filter_issues_default(data: MinedData) -> dict[Issue, Repository]: Returns: dict[Issue, Repository]: The filtered issues. """ - return {issue: repo for issue, repo in data.issues.items() if (issue.closed_at is None) or (issue.closed_at >= data.since)} + return { + issue: repo + for issue, repo in data.issues.items() + if (issue.closed_at is None) or (issue.closed_at >= data.since) + } @staticmethod def _filter_issues_issue_hierarchy(data: MinedData) -> dict[Issue, Repository]: @@ -162,8 +166,5 @@ def _filter_issues_issue_hierarchy(data: MinedData) -> dict[Issue, Repository]: return { issue: repo for issue, repo in data.issues.items() - if ( - (issue.closed_at is not None and issue.closed_at >= data.since) - or (issue.state == "open") - ) + if ((issue.closed_at is not None and issue.closed_at >= data.since) or (issue.state == "open")) } diff --git a/release_notes_generator/data/miner.py b/release_notes_generator/data/miner.py index 2508663d..ac0fbcc3 100644 --- a/release_notes_generator/data/miner.py +++ b/release_notes_generator/data/miner.py @@ -67,8 +67,12 @@ def mine_data(self) -> MinedData: self._get_issues(data) # pulls and commits, and then reduce them by the latest release since time - pull_requests = list(self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_CLOSED, base=repo.default_branch)) - open_pull_requests = list(self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_OPEN, base=repo.default_branch)) + pull_requests = list( + self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_CLOSED, base=repo.default_branch) + ) + open_pull_requests = list( + self._safe_call(repo.get_pulls)(state=PullRequestRecord.PR_STATE_OPEN, base=repo.default_branch) + ) data.pull_requests = {pr: data.home_repository for pr in pull_requests} if data.since: commits = list(self._safe_call(repo.get_commits)(since=data.since)) @@ -85,6 +89,13 @@ def mine_data(self) -> MinedData: return de_duplicated_data def mine_missing_sub_issues(self, data: MinedData) -> dict[Issue, Repository]: + """ + Mines missing sub-issues from GitHub. + Parameters: + data (MinedData): The mined data containing origin sets of issues and pull requests. + Returns: + list[Issue]: A list of fetched missing issues. + """ logger.debug("Mapping sub-issues...") data.parents_sub_issues = self._scan_sub_issues_for_parents([get_id(i, r) for i, r in data.issues.items()]) @@ -139,16 +150,17 @@ def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Reposito data.add_repository(new_repo) issue = None - if data.get_repository(f"{org}/{repo}") is not None: - issue = self._safe_call(data.get_repository(f"{org}/{repo}").get_issue)(num) - if issue is None: - logger.error("Issue not found: %s", parent_id) - continue - else: + r = data.get_repository(f"{org}/{repo}") + if r is not None: + issue = self._safe_call(r.get_issue)(num) + if issue is None: + logger.error("Issue not found: %s", parent_id) + continue + logger.debug("Fetching missing issue: %s", parent_id) - # add to issues list - fetched_issues[issue] = data.get_repository(f"{org}/{repo}") + # add to issues list + fetched_issues[issue] = r logger.debug("Fetched %d missing issues.", len(fetched_issues)) return fetched_issues @@ -323,7 +335,11 @@ def __filter_duplicated_issues(data: MinedData, open_pull_requests: list[PullReq pr_numbers = {pr.number for pr in data.pull_requests.keys()} open_pr_numbers = [pr.number for pr in open_pull_requests] - filtered_issues = {issue: repo for issue, repo in data.issues.items() if issue.number not in pr_numbers and issue.number not in open_pr_numbers} + filtered_issues = { + issue: repo + for issue, repo in data.issues.items() + if issue.number not in pr_numbers and issue.number not in open_pr_numbers + } logger.debug("Duplicated issues removed: %s", len(data.issues.items()) - len(filtered_issues.items())) diff --git a/release_notes_generator/data/utils/bulk_sub_issue_collector.py b/release_notes_generator/data/utils/bulk_sub_issue_collector.py index 626743f8..d12d0bdf 100644 --- a/release_notes_generator/data/utils/bulk_sub_issue_collector.py +++ b/release_notes_generator/data/utils/bulk_sub_issue_collector.py @@ -1,3 +1,7 @@ +""" +Collect sub-issues for received parent issues in bulk via GitHub GraphQL API. +""" + from __future__ import annotations import json @@ -29,9 +33,9 @@ class CollectorConfig: base_backoff: float = 1.0 # Pagination and batching - per_page: int = 100 # Max allowed by GitHub for subIssues - max_parents_per_repo: int = 100 # Max issue aliases per repository(...) block - max_repos_per_request: int = 1 # Max repository blocks per query + per_page: int = 100 # Max allowed by GitHub for subIssues + max_parents_per_repo: int = 100 # Max issue aliases per repository(...) block + max_repos_per_request: int = 1 # Max repository blocks per query # Pacing gentle_pacing_seconds: float = 0.05 @@ -44,10 +48,10 @@ class BulkSubIssueCollector: """ def __init__( - self, - token: str, - cfg: Optional[CollectorConfig] = None, - session: Optional[requests.Session] = None, + self, + token: str, + cfg: Optional[CollectorConfig] = None, + session: Optional[requests.Session] = None, ): self._cfg = cfg or CollectorConfig() self._session = session or requests.Session() @@ -59,6 +63,7 @@ def __init__( # Parent -> list of its direct sub-issues ("org/repo#n") self.parents_sub_issues: dict[str, list[str]] = {} + # pylint: disable=too-many-locals,too-many-statements,too-many-branches def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: """ Input: ["org/repo#123", "org2/repo2#77", ...] @@ -86,9 +91,7 @@ def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: # Maintain cursors per (org, repo, issue). cursors: Dict[Tuple[str, str, int], Optional[str]] = {} - remaining_by_repo: Dict[Tuple[str, str], Set[int]] = { - k: set(v) for k, v in repo_chunk - } + remaining_by_repo: Dict[Tuple[str, str], Set[int]] = {k: set(v) for k, v in repo_chunk} for (org, repo), nums in remaining_by_repo.items(): for n in nums: cursors[(org, repo, n)] = None @@ -185,7 +188,8 @@ def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: time.sleep(0.05) # Deterministic order - return sorted(new_parents_to_check, key=lambda s: (lambda o, r, n: (o, r, n))(*parse_issue_id(s))) + # return sorted(new_parents_to_check, key=lambda s: (lambda o, r, n: (o, r, n))(*parse_issue_id(s))) + return sorted(new_parents_to_check, key=parse_issue_id) # ---------- internals ---------- diff --git a/release_notes_generator/model/mined_data.py b/release_notes_generator/model/mined_data.py index 646fbcf9..a47c2a2e 100644 --- a/release_notes_generator/model/mined_data.py +++ b/release_notes_generator/model/mined_data.py @@ -60,9 +60,10 @@ def add_repository(self, repository: Repository) -> None: """Add a repository to the mined data if not already present.""" if repository.full_name not in self._repositories: self._repositories[repository.full_name] = repository - logger.debug(f"Added repository {repository.full_name} to mined data.") + logger.debug("Added repository %s to mined data.", repository.full_name) def get_repository(self, full_name: str) -> Optional[Repository]: + """Get a repository by its full name.""" if full_name not in self._repositories: return None diff --git a/release_notes_generator/model/record.py b/release_notes_generator/model/record.py index b8ec8bae..e9a8cfda 100644 --- a/release_notes_generator/model/record.py +++ b/release_notes_generator/model/record.py @@ -22,8 +22,6 @@ from abc import ABCMeta, abstractmethod from typing import Optional -from github.Repository import Repository - logger = logging.getLogger(__name__) diff --git a/release_notes_generator/record/factory/default_record_factory.py b/release_notes_generator/record/factory/default_record_factory.py index 221cd1e2..11a0c401 100644 --- a/release_notes_generator/record/factory/default_record_factory.py +++ b/release_notes_generator/record/factory/default_record_factory.py @@ -42,6 +42,7 @@ logger = logging.getLogger(__name__) + class DefaultRecordFactory(RecordFactory): """ A class used to generate records for release notes. @@ -152,7 +153,9 @@ def register_pull_request(pr: PullRequest, l_pid: str, skip_rec: bool) -> None: register_pull_request(pull, pid, skip_record) logger.debug("Registering commits to records...") - detected_direct_commits_count = sum(not self.register_commit_to_record(commit, get_id(commit, repo)) for commit, repo in data.commits.items()) + detected_direct_commits_count = sum( + not self.register_commit_to_record(commit, get_id(commit, repo)) for commit, repo in data.commits.items() + ) logger.info( "Generated %d records from %d issues and %d PRs, with %d commits detected.", diff --git a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py index 3bfda3f3..a5809d9c 100644 --- a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py +++ b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py @@ -64,7 +64,9 @@ def generate(self, data: MinedData) -> dict[str, Record]: Returns: dict[str, Record]: A dictionary of records indexed by their IDs. """ - logger.debug("Creation of records started...") # NEW: uz mam mnapovani, kdo je hierarchy, kdo je SubIssue a kdo je Issue + logger.debug( + "Creation of records started..." + ) # NEW: uz mam mnapovani, kdo je hierarchy, kdo je SubIssue a kdo je Issue for issue, repo in data.issues.items(): iid = get_id(issue, repo) @@ -97,12 +99,10 @@ def generate(self, data: MinedData) -> dict[str, Record]: logger.debug("Building issues hierarchy...") self._re_register_hierarchy_issues( - sub_issues_ids = list({iid for sublist in data.parents_sub_issues.values() for iid in sublist}), - sub_issue_parents = { - sub_issue: parent - for parent, sublist in data.parents_sub_issues.items() - for sub_issue in sublist - } + sub_issues_ids=list({iid for sublist in data.parents_sub_issues.values() for iid in sublist}), + sub_issue_parents={ + sub_issue: parent for parent, sublist in data.parents_sub_issues.items() for sub_issue in sublist + }, ) self.order_hierarchy_levels() @@ -115,6 +115,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: ) return self._records + # pylint: disable=too-many-statements def _register_pull_and_its_commits_to_issue( self, pull: PullRequest, pid: str, data: MinedData, target_repository: Optional[Repository] = None ) -> None: @@ -140,10 +141,10 @@ def _register_pull_and_its_commits_to_issue( ) # dev note: here we expect that PR links to an issue in the same repository !!! org, repo, num = parse_issue_id(issue_id) - repo_full_name = f"{org}/{repo}" - parent_issue = self._safe_call(data.get_repository(repo_full_name).get_issue)(num) if data.get_repository(repo_full_name) is not None else None + r = data.get_repository(f"{org}/{repo}") + parent_issue = self._safe_call(r.get_issue)(num) if r is not None else None if parent_issue is not None: - self._create_record_for_issue(parent_issue, get_id(parent_issue, data.get_repository(repo_full_name))) + self._create_record_for_issue(parent_issue, get_id(parent_issue, r)) # type: ignore[arg-type] if issue_id in self._records and isinstance( self._records[issue_id], (SubIssueRecord, HierarchyIssueRecord, IssueRecord) @@ -202,7 +203,7 @@ def _create_record_for_issue(self, issue: Issue, iid: str, issue_labels: Optiona super()._create_record_for_issue(issue, iid, issue_labels) self.__registered_issues.add(iid) - def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: + def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Optional[list[str]] = None) -> None: if issue_labels is None: issue_labels = self._get_issue_labels_mix_with_type(issue) diff --git a/release_notes_generator/utils/record_utils.py b/release_notes_generator/utils/record_utils.py index 2a3818d8..dac17bb0 100644 --- a/release_notes_generator/utils/record_utils.py +++ b/release_notes_generator/utils/record_utils.py @@ -1,3 +1,7 @@ +""" +Utilities for working with GitHub issue/PR/commit identifiers. +""" + import logging import re from functools import singledispatchmethod, lru_cache @@ -12,10 +16,22 @@ ISSUE_ID_RE = re.compile(r"^(?P[^/\s]+)/(?P[^#\s]+)#(?P\d+)$") + class IssueIdParseError(ValueError): + """Raised when an issue ID cannot be parsed.""" + pass + def get_id(obj, repository: Repository) -> str: + """ + Get a stable identifier for an Issue, PullRequest, or Commit within a given repository. + Parameters: + obj: The object to get the ID for (Issue, PullRequest, or Commit). + repository: The Repository the object belongs to. + Returns: + A string identifier for the object. + """ if isinstance(obj, Issue): issue = cast(Issue, obj) return _issue_id(repository.full_name, issue.number) @@ -28,14 +44,19 @@ def get_id(obj, repository: Repository) -> str: return str(obj) + @lru_cache(maxsize=2048) def _issue_id(repo_full_name: str, number: int) -> str: + """Format 'org/repo#123' from components.""" return f"{repo_full_name}#{number}" + @lru_cache(maxsize=2048) def _pr_id(repo_full_name: str, number: int) -> str: + """Format 'org/repo#123' from components.""" return f"{repo_full_name}#{number}" + def parse_issue_id(issue_id: str) -> tuple[str, str, int]: """ Parse 'org/repo#123' -> (org, repo, 123). @@ -43,10 +64,10 @@ def parse_issue_id(issue_id: str) -> tuple[str, str, int]: """ m = ISSUE_ID_RE.match(issue_id.strip()) if not m: - raise IssueIdParseError( - f"Invalid issue id '{issue_id}', expected 'org/repo#number'" - ) + raise IssueIdParseError(f"Invalid issue id '{issue_id}', expected 'org/repo#number'") return m.group("org"), m.group("repo"), int(m.group("num")) + def format_issue_id(org: str, repo: str, number: int) -> str: + """Format 'org/repo#123' from components.""" return f"{org}/{repo}#{number}" From 02a17e4da1654fbba3a39e800482c6772f9861de Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 16:04:12 +0200 Subject: [PATCH 6/8] Fixed Rabbit review notes. --- integration_test.py | 3 ++- release_notes_generator/data/miner.py | 11 +++++------ .../data/utils/bulk_sub_issue_collector.py | 5 +++-- .../record/factory/default_record_factory.py | 14 -------------- .../factory/issue_hierarchy_record_factory.py | 7 +++---- release_notes_generator/utils/record_utils.py | 14 ++++---------- tests/conftest.py | 5 ----- tests/release_notes/data/test_filter.py | 6 +++--- 8 files changed, 20 insertions(+), 45 deletions(-) diff --git a/integration_test.py b/integration_test.py index b6304384..18d4145c 100644 --- a/integration_test.py +++ b/integration_test.py @@ -13,7 +13,8 @@ if token is None: raise ValueError("GITHUB_TOKEN environment variable is not set") -# If you need to disable TLS verification (to mirror your example): +# WARNING: TLS verification is disabled for testing purposes only. +# Do not use this configuration in production. cfg = CollectorConfig(verify_tls=False) collector = BulkSubIssueCollector(token, cfg=cfg) diff --git a/release_notes_generator/data/miner.py b/release_notes_generator/data/miner.py index ac0fbcc3..531fd347 100644 --- a/release_notes_generator/data/miner.py +++ b/release_notes_generator/data/miner.py @@ -21,7 +21,6 @@ import logging import sys import traceback -from builtins import list from typing import Optional import semver @@ -94,7 +93,7 @@ def mine_missing_sub_issues(self, data: MinedData) -> dict[Issue, Repository]: Parameters: data (MinedData): The mined data containing origin sets of issues and pull requests. Returns: - list[Issue]: A list of fetched missing issues. + dict[Issue, Repository]: A dictionary mapping fetched issues to their repositories. """ logger.debug("Mapping sub-issues...") data.parents_sub_issues = self._scan_sub_issues_for_parents([get_id(i, r) for i, r in data.issues.items()]) @@ -131,7 +130,7 @@ def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Reposito Parameters: data (MinedData): The mined data containing origin sets of issues and pull requests. Returns: - list[Issue]: A list of fetched missing issues. + dict[Issue, Repository]: A dictionary mapping fetched issues to their repositories. """ fetched_issues: dict[Issue, Repository] = {} @@ -144,7 +143,7 @@ def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Reposito org, repo, num = parse_issue_id(parent_id) if data.get_repository(f"{org}/{repo}") is None: - new_repo = self._get_repository(f"{org}/{repo}") + new_repo = self._fetch_repository(f"{org}/{repo}") if new_repo is not None: # cache for subsequent lookups data.add_repository(new_repo) @@ -165,9 +164,9 @@ def _fetch_missing_issues_and_prs(self, data: MinedData) -> dict[Issue, Reposito logger.debug("Fetched %d missing issues.", len(fetched_issues)) return fetched_issues - def _get_repository(self, full_name: str) -> Optional[Repository]: + def _fetch_repository(self, full_name: str) -> Optional[Repository]: """ - Retrieves the specified GitHub repository. + Fetch a repository by its full name. Returns: Optional[Repository]: The GitHub repository if found, None otherwise. diff --git a/release_notes_generator/data/utils/bulk_sub_issue_collector.py b/release_notes_generator/data/utils/bulk_sub_issue_collector.py index d12d0bdf..c9c93970 100644 --- a/release_notes_generator/data/utils/bulk_sub_issue_collector.py +++ b/release_notes_generator/data/utils/bulk_sub_issue_collector.py @@ -185,7 +185,7 @@ def scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> list[str]: remaining_by_repo[(org, repo)].discard(parent_num) # Gentle pacing to avoid secondary limits - time.sleep(0.05) + time.sleep(self._cfg.gentle_pacing_seconds) # Deterministic order # return sorted(new_parents_to_check, key=lambda s: (lambda o, r, n: (o, r, n))(*parse_issue_id(s))) @@ -207,9 +207,10 @@ def _post_graphql(self, payload: dict) -> dict: ) resp.raise_for_status() data = resp.json() - if "errors" in data and data["errors"]: + if data.get("errors"): logger.error("GraphQL errors: %s", data["errors"]) raise RuntimeError(f"GitHub GraphQL errors: {data['errors']}") + logger.debug("Posted graphql query") return data except Exception as e: diff --git a/release_notes_generator/record/factory/default_record_factory.py b/release_notes_generator/record/factory/default_record_factory.py index 11a0c401..2639f5c8 100644 --- a/release_notes_generator/record/factory/default_record_factory.py +++ b/release_notes_generator/record/factory/default_record_factory.py @@ -56,20 +56,6 @@ def __init__(self, github: Github, home_repository: Repository) -> None: self._records: dict[str, Record] = {} - # TODO - this should not be needed now - delete - # def get_repository(self, full_name: str) -> Optional[Repository]: - # """ - # Retrieves the specified GitHub repository. - # - # Returns: - # Optional[Repository]: The GitHub repository if found, None otherwise. - # """ - # repo: Optional[Repository] = self._safe_call(self._github.get_repo)(full_name) - # if repo is None: - # logger.error("Repository not found: %s", full_name) - # return None - # return repo - def generate(self, data: MinedData) -> dict[str, Record]: """ Generate records for release notes. diff --git a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py index a5809d9c..38d8483e 100644 --- a/release_notes_generator/record/factory/issue_hierarchy_record_factory.py +++ b/release_notes_generator/record/factory/issue_hierarchy_record_factory.py @@ -19,7 +19,6 @@ """ import logging -from copy import deepcopy from typing import cast, Optional from github import Github @@ -70,7 +69,7 @@ def generate(self, data: MinedData) -> dict[str, Record]: for issue, repo in data.issues.items(): iid = get_id(issue, repo) - if len(data.parents_sub_issues[iid]) > 0: + if len(data.parents_sub_issues.get(iid, [])) > 0: # issue has sub-issues - it is either hierarchy issue or sub-hierarchy issue self._create_record_for_hierarchy_issue(issue, iid) @@ -208,7 +207,7 @@ def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Opt issue_labels = self._get_issue_labels_mix_with_type(issue) skip_record = any(item in issue_labels for item in ActionInputs.get_skip_release_notes_labels()) - logger.debug("Created record for sub issue %s: %s", id, issue.title) + logger.debug("Created record for sub issue %s: %s", iid, issue.title) self.__registered_issues.add(iid) self._records[iid] = SubIssueRecord(issue, issue_labels, skip_record) @@ -219,7 +218,7 @@ def _create_record_for_sub_issue(self, issue: Issue, iid: str, issue_labels: Opt def _re_register_hierarchy_issues(self, sub_issues_ids: list[str], sub_issue_parents: dict[str, str]): logger.debug("Re-registering hierarchy issues ...") - reduced_sub_issue_ids: list[str] = deepcopy(sub_issues_ids) + reduced_sub_issue_ids: list[str] = sub_issues_ids[:] made_progress = False for sub_issue_id in sub_issues_ids: diff --git a/release_notes_generator/utils/record_utils.py b/release_notes_generator/utils/record_utils.py index dac17bb0..b23f0f7e 100644 --- a/release_notes_generator/utils/record_utils.py +++ b/release_notes_generator/utils/record_utils.py @@ -4,7 +4,7 @@ import logging import re -from functools import singledispatchmethod, lru_cache +from functools import lru_cache from typing import cast from github.Commit import Commit @@ -34,10 +34,10 @@ def get_id(obj, repository: Repository) -> str: """ if isinstance(obj, Issue): issue = cast(Issue, obj) - return _issue_id(repository.full_name, issue.number) + return _entity_id(repository.full_name, issue.number) elif isinstance(obj, PullRequest): pr = cast(PullRequest, obj) - return _pr_id(repository.full_name, pr.number) + return _entity_id(repository.full_name, pr.number) elif isinstance(obj, Commit): commit = cast(Commit, obj) return f"{commit.sha}" @@ -46,13 +46,7 @@ def get_id(obj, repository: Repository) -> str: @lru_cache(maxsize=2048) -def _issue_id(repo_full_name: str, number: int) -> str: - """Format 'org/repo#123' from components.""" - return f"{repo_full_name}#{number}" - - -@lru_cache(maxsize=2048) -def _pr_id(repo_full_name: str, number: int) -> str: +def _entity_id(repo_full_name: str, number: int) -> str: """Format 'org/repo#123' from components.""" return f"{repo_full_name}#{number}" diff --git a/tests/conftest.py b/tests/conftest.py index c94787bf..40c453b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -634,7 +634,6 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( hi_two_sub_issues_no_prs.body = "I301 open\nRelease Notes:\n- Hierarchy level release note" sub_issue_1 = copy.deepcopy(mock_open_sub_issue) sub_issue_2 = copy.deepcopy(mock_closed_sub_issue) - # hi_two_sub_issues_no_prs.get_sub_issues.return_value = [sub_issue_1, sub_issue_2] data.parents_sub_issues[si1 := get_id(sub_issue_1, mock_repo)] = [] data.parents_sub_issues[si2 := get_id(sub_issue_2, mock_repo)] = [] data.parents_sub_issues[get_id(hi_two_sub_issues_no_prs, mock_repo)] = [si1, si2] @@ -658,7 +657,6 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_pr_closed_2.merge_commit_sha = "merge_commit_sha_150" mock_pr_closed_2.get_labels.return_value = [] mock_pr_closed_2.body += "\nCloses #451" - # hi_two_sub_issues_with_prs.get_sub_issues.return_value = [sub_issue_3, sub_issue_4] data.parents_sub_issues[si3 := get_id(sub_issue_3, mock_repo)] = [] data.parents_sub_issues[si4 := get_id(sub_issue_4, mock_repo)] = [] data.parents_sub_issues[get_id(hi_two_sub_issues_with_prs, mock_repo)] = [si3, si4] @@ -685,7 +683,6 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_commit_1 = copy.deepcopy(mock_commit) mock_commit_1.sha = "merge_commit_sha_151" mock_commit_1.commit.message = "Fixed bug in PR 151" - # hi_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_issue_5, sub_issue_6] data.parents_sub_issues[si5 := get_id(sub_issue_5, mock_repo)] = [] data.parents_sub_issues[si6 := get_id(sub_issue_6, mock_repo)] = [] data.parents_sub_issues[get_id(hi_two_sub_issues_with_prs_with_commit, mock_repo)] = [si5, si6] @@ -716,8 +713,6 @@ def mined_data_isolated_record_types_no_labels_no_type_defined( mock_commit_2 = copy.deepcopy(mock_commit) mock_commit_2.sha = "merge_commit_sha_152" mock_commit_2.commit.message = "Fixed bug in PR 152" - # sub_hierarchy_issue.get_sub_issues.return_value = [sub_issue_7, sub_issue_8] - # hi_one_sub_hierarchy_two_sub_issues_with_prs_with_commit.get_sub_issues.return_value = [sub_hierarchy_issue] data.parents_sub_issues[si7 := get_id(sub_issue_7, mock_repo)] = [] data.parents_sub_issues[si8 := get_id(sub_issue_8, mock_repo)] = [] data.parents_sub_issues[shi := get_id(sub_hierarchy_issue, mock_repo)] = [si7, si8] diff --git a/tests/release_notes/data/test_filter.py b/tests/release_notes/data/test_filter.py index d6ffab87..d2b737d1 100644 --- a/tests/release_notes/data/test_filter.py +++ b/tests/release_notes/data/test_filter.py @@ -82,9 +82,9 @@ def test_filter_with_release(mocker): assert len(filtered_data.issues) == 1 assert len(filtered_data.pull_requests) == 1 assert len(filtered_data.commits) == 1 - assert list(filtered_data.issues.keys())[0].closed_at == datetime(2023, 1, 2) - assert list(filtered_data.pull_requests.keys())[0].merged_at == datetime(2023, 2, 3) - assert list(filtered_data.commits.keys())[0].commit.author.date == datetime(2024, 1, 4) + assert next(iter(filtered_data.issues.keys())).closed_at == datetime(2023, 1, 2) + assert next(iter(filtered_data.pull_requests.keys())).merged_at == datetime(2023, 2, 3) + assert next(iter(filtered_data.commits.keys())).commit.author.date == datetime(2024, 1, 4) assert ('Starting issue, prs and commit reduction by the latest release since time.',) == mock_log_info.call_args_list[0][0] assert ('Count of issues reduced from %d to %d', 2, 1) == mock_log_debug.call_args_list[1][0] assert ('Count of pulls reduced from %d to %d', 2, 1) == mock_log_debug.call_args_list[2][0] From 5ece6a5cb828e8e67adfc8b22ea6fea5c91e80b8 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 16:09:27 +0200 Subject: [PATCH 7/8] Fix log message to correct level. --- release_notes_generator/data/miner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release_notes_generator/data/miner.py b/release_notes_generator/data/miner.py index 531fd347..367f2b14 100644 --- a/release_notes_generator/data/miner.py +++ b/release_notes_generator/data/miner.py @@ -95,10 +95,10 @@ def mine_missing_sub_issues(self, data: MinedData) -> dict[Issue, Repository]: Returns: dict[Issue, Repository]: A dictionary mapping fetched issues to their repositories. """ - logger.debug("Mapping sub-issues...") + logger.info("Mapping sub-issues...") data.parents_sub_issues = self._scan_sub_issues_for_parents([get_id(i, r) for i, r in data.issues.items()]) - logger.debug("Fetching missing issues...") + logger.info("Fetching missing issues...") return self._fetch_missing_issues_and_prs(data) def _scan_sub_issues_for_parents(self, parents_to_check: list[str]) -> dict[str, list[str]]: From 49c817038e19da1574377e6674048f99f9bf6189 Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Tue, 30 Sep 2025 16:12:50 +0200 Subject: [PATCH 8/8] Applied proposal. --- integration_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/integration_test.py b/integration_test.py index 18d4145c..c86c702f 100644 --- a/integration_test.py +++ b/integration_test.py @@ -9,9 +9,13 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +class MissingTokenError(ValueError): + """Raised when GITHUB_TOKEN environment variable is not set.""" + pass + token = os.getenv("GITHUB_TOKEN") if token is None: - raise ValueError("GITHUB_TOKEN environment variable is not set") + raise MissingTokenError("GITHUB_TOKEN environment variable is not set") # WARNING: TLS verification is disabled for testing purposes only. # Do not use this configuration in production.