Skip to content

Commit

Permalink
Merge pull request #49333 from ClickHouse/backport/23.4/49314
Browse files Browse the repository at this point in the history
Backport #49314 to 23.4: Fallback auth gh api
  • Loading branch information
Felixoid committed Apr 29, 2023
2 parents caa139f + bd86223 commit dc1dffa
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 22 deletions.
53 changes: 47 additions & 6 deletions tests/ci/build_download_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import sys
import time
from pathlib import Path
from typing import Any, Callable, List, Optional
from typing import Any, Callable, List

import requests # type: ignore

from ci_config import CI_CONFIG
from get_robot_token import ROBOT_TOKEN, get_best_robot_token

DOWNLOAD_RETRIES_COUNT = 5

Expand All @@ -24,22 +25,62 @@ def get_with_retries(
logging.info(
"Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
)
exc = None # type: Optional[Exception]
exc = Exception("A placeholder to satisfy typing and avoid nesting")
for i in range(retries):
try:
response = requests.get(url, **kwargs)
response.raise_for_status()
break
return response
except Exception as e:
if i + 1 < retries:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)

exc = e
else:
raise Exception(exc)

return response
raise exc


def get_gh_api(
url: str,
retries: int = DOWNLOAD_RETRIES_COUNT,
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
"""It's a wrapper around get_with_retries that requests GH api w/o auth by
default, and falls back to the get_best_robot_token in case of receiving
"403 rate limit exceeded" error
It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
"""

def set_auth_header():
if "headers" in kwargs:
if "Authorization" not in kwargs["headers"]:
kwargs["headers"]["Authorization"] = f"Bearer {get_best_robot_token()}"
else:
kwargs["headers"] = {"Authorization": f"Bearer {get_best_robot_token()}"}

if ROBOT_TOKEN is not None:
set_auth_header()

for _ in range(retries):
try:
response = get_with_retries(url, 1, sleep, **kwargs)
response.raise_for_status()
return response
except requests.HTTPError as exc:
if (
exc.response.status_code == 403
and b"rate limit exceeded"
in exc.response._content # pylint:disable=protected-access
):
logging.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
break

return get_with_retries(url, retries, sleep, **kwargs)


def get_build_name_for_check(check_name: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions tests/ci/env_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from os import path as p

from build_download_helper import get_with_retries
from build_download_helper import get_gh_api

module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
Expand Down Expand Up @@ -46,7 +46,7 @@ def GITHUB_JOB_ID() -> str:
jobs = []
page = 1
while not _GITHUB_JOB_ID:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
)
Expand Down
24 changes: 16 additions & 8 deletions tests/ci/get_robot_token.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import logging
from dataclasses import dataclass
from typing import Optional

import boto3 # type: ignore
from github import Github
Expand All @@ -20,15 +21,20 @@ def get_parameter_from_ssm(name, decrypt=True, client=None):
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]


ROBOT_TOKEN = None # type: Optional[Token]


def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
global ROBOT_TOKEN
if ROBOT_TOKEN is not None:
return ROBOT_TOKEN.value
client = boto3.client("ssm", region_name="us-east-1")
parameters = client.describe_parameters(
ParameterFilters=[
{"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]}
]
)["Parameters"]
assert parameters
token = None

for token_name in [p["Name"] for p in parameters]:
value = get_parameter_from_ssm(token_name, True, client)
Expand All @@ -38,15 +44,17 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
user = gh.get_user()
rest, _ = gh.rate_limiting
logging.info("Get token with %s remaining requests", rest)
if token is None:
token = Token(user, value, rest)
if ROBOT_TOKEN is None:
ROBOT_TOKEN = Token(user, value, rest)
continue
if token.rest < rest:
token.user, token.value, token.rest = user, value, rest
if ROBOT_TOKEN.rest < rest:
ROBOT_TOKEN.user, ROBOT_TOKEN.value, ROBOT_TOKEN.rest = user, value, rest

assert token
assert ROBOT_TOKEN
logging.info(
"User %s with %s remaining requests is used", token.user.login, token.rest
"User %s with %s remaining requests is used",
ROBOT_TOKEN.user.login,
ROBOT_TOKEN.rest,
)

return token.value
return ROBOT_TOKEN.value
12 changes: 6 additions & 6 deletions tests/ci/pr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from unidiff import PatchSet # type: ignore

from build_download_helper import get_with_retries
from build_download_helper import get_gh_api
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
Expand Down Expand Up @@ -45,7 +45,7 @@ def get_pr_for_commit(sha, ref):
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
)
try:
response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
response = get_gh_api(try_get_pr_url, sleep=RETRY_SLEEP)
data = response.json()
our_prs = [] # type: List[Dict]
if len(data) > 1:
Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
# workflow completed event, used for PRs only
if "action" in github_event and github_event["action"] == "completed":
self.sha = github_event["workflow_run"]["head_sha"]
prs_for_sha = get_with_retries(
prs_for_sha = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
"/pulls",
sleep=RETRY_SLEEP,
Expand All @@ -117,7 +117,7 @@ def __init__(
self.number = github_event["pull_request"]["number"]
if pr_event_from_api:
try:
response = get_with_retries(
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
f"/pulls/{self.number}",
sleep=RETRY_SLEEP,
Expand Down Expand Up @@ -159,7 +159,7 @@ def __init__(
self.user_login = github_event["pull_request"]["user"]["login"]
self.user_orgs = set([])
if need_orgs:
user_orgs_response = get_with_retries(
user_orgs_response = get_gh_api(
github_event["pull_request"]["user"]["organizations_url"],
sleep=RETRY_SLEEP,
)
Expand Down Expand Up @@ -255,7 +255,7 @@ def fetch_changed_files(self):
raise TypeError("The event does not have diff URLs")

for diff_url in self.diff_urls:
response = get_with_retries(
response = get_gh_api(
diff_url,
sleep=RETRY_SLEEP,
)
Expand Down

0 comments on commit dc1dffa

Please sign in to comment.