-
Notifications
You must be signed in to change notification settings - Fork 2
/
__init__.py
243 lines (176 loc) Β· 6.37 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
"""
Library module.
"""
import csv
import datetime
import json
import sys
from pathlib import Path
from time import sleep
import config
import requests
from jinja2 import Template # type: ignore
# TODO: Rename time to not conflict with builtin.
from . import text, time
# FIXME: this only happens to work in lib so should be more robust
APP_DIR = Path().absolute()
VAR_DIR = APP_DIR / "var"
COUNTS_CSV_PATH = VAR_DIR / "counts.csv"
# TODO: Add start date.
COUNTS_CSV_PATH_TODAY = VAR_DIR / f"counts-end-{datetime.date.today()}.csv"
STARRED_CSV_PATH = VAR_DIR / "starred.csv"
ERROR_QUERY_PATH = VAR_DIR / "error_query.gql"
ERROR_PAYLOAD_PATH = VAR_DIR / "error_payload.gql"
HEADERS = {"Authorization": f"token {config.ACCESS_TOKEN}"}
MAX_ATTEMPTS = 3
dict_of_str = dict[str, str]
list_of_str = list[str]
def _request(url: str, payload: dict_of_str, headers: dict_of_str):
resp = requests.post(url, json=payload, headers=headers)
resp_json = resp.json()
resp_msg = resp_json.get("message", None)
if resp_msg == "Bad credentials":
print("Bad credentials")
print("Update the configured token and try again")
sys.exit(1)
errors = resp_json.get("errors", None)
# TODO: Abort immediately on bad syntax or bad/missing variable.
if errors:
print(f"Writing query to: {ERROR_QUERY_PATH}")
write_file(payload["query"], ERROR_QUERY_PATH)
print(f"Writing payload to: {ERROR_PAYLOAD_PATH}")
write_file(payload, ERROR_PAYLOAD_PATH)
msg = text.prettify(errors)
raise ValueError(f"Error requesting GitHub. Errors:\n{msg}")
if (resp_json.get("data", None)) is None:
message = text.prettify(resp_json)
raise ValueError(f"Error requesting GitHub. Details:\n{message}")
return resp_json
def fetch_github_data(query: str, variables=None) -> dict_of_str:
"""
Get data from GitHub API using given parameters.
Note that a request which returns an error will still give a 200 and can
might still contain some data. A 404 will not contain the data or errors
keys.
"""
if not variables:
variables = {}
payload = {
"query": query,
"variables": variables,
}
resp_json = {}
for i in range(MAX_ATTEMPTS):
try:
resp_json = _request(config.BASE_URL, payload, HEADERS)
except ValueError as e:
text.eprint(f"Requested failed - attempt #{i+1}/{MAX_ATTEMPTS}")
if i + 1 == MAX_ATTEMPTS:
raise
text.eprint(e)
if "rate" in str(e):
print("RATE LIMITED")
# TODO: Sleep for set time or perhaps short time if too frequent
# between requests.
seconds = 10
text.eprint(f"Sleeping {seconds} s...")
sleep(seconds * 1000)
text.eprint("Retrying...")
else:
break
return resp_json.get("data", None)
def read_file(path: Path):
"""
TODO: Refactor to use Path().read() instead.
"""
with open(path) as f_in:
file_text = f_in.read()
return file_text
def write_file(content, path: Path):
"""
Write a list or str to a given filepath.
TODO: Refactor to use Path() with appropriate text or JSON method.
"""
if isinstance(content, (list, dict)):
content = json.dumps(content)
print("Writing")
print(f" - path: {path}")
with open(path, "w") as f_out:
f_out.writelines(content)
def read_template(path: Path):
"""
Return Jinja template at a given path.
"""
return Template(read_file(path))
# TODO Rename to path.
# TODO Refactor so the file only has to be read once for a set of paged queries.
def query_by_filename(path: Path, variables=None):
if not variables:
variables = {}
query = read_file(path)
resp = fetch_github_data(query, variables)
return resp
def read_csv(path: Path):
with open(path) as f_in:
reader = csv.DictReader(f_in)
return list(reader)
def write_csv(path: Path, rows: list[dict_of_str], append=False) -> None:
"""
Write a CSV file to a path with given rows and header from first row.
Default behavior is to overrwrite an existing file. Append to existing file
if append is flag True. Either way, the header will only be added on a new
file. Appending is useful when adding sections to a report, but overwriting
is better when rerunning an entire report.
"""
if not rows:
print("No rows to write")
print()
return
is_new_file = not path.exists()
mode = "a" if append else "w"
fieldnames = list(rows[0].keys())
with open(path, mode) as f_out:
writer = csv.DictWriter(f_out, fieldnames)
if is_new_file or not append:
writer.writeheader()
writer.writerows(rows)
# TODO: How to include `var/name` if relevant. To make it easy to click on console output.
print("Wrote CSV:")
print(f" - {path.name}")
print(f" - {len(rows)} rows {'appended' if append else ''}")
print()
def process_variables(args: list_of_str) -> dict_of_str:
"""
Process command-line arguments containing a filename and key-value pairs.
"""
if args:
if len(args) % 2:
provided = " ".join(args)
raise ValueError(
f"Uneven number of items in key-value pairs. Got: {provided}"
)
variables = dict(zip(args[::2], args[1::2]))
# TODO: Make this clear that you use start and it becomes since.
start = variables.pop("start", None)
if start:
variables["since"] = time.as_git_timestamp(start)
is_fork_arg = variables.pop("isFork", None)
if is_fork_arg:
variables["isFork"] = text.parse_bool(is_fork_arg)
return variables
return {}
def process_args(args: list_of_str):
"""
Process command-line arguments containing a filename and key-value pairs.
Separate args into filepath and optional key-value pairs, with spaces
between pairs and within pairs. Rather than setting allowed keys, any
key is allowed.
"""
path = args.pop(0)
variables = process_variables(args)
return path, variables
def to_archive_url(owner: str, repo_name: str, branch: str) -> str:
"""
Return a download URL for a repo's zip file.
"""
return f"https://github.com/{owner}/{repo_name}/archive/{branch}.zip"