-
Notifications
You must be signed in to change notification settings - Fork 180
/
check_nb_kernel.py
245 lines (215 loc) · 8.3 KB
/
check_nb_kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
"""
Checker/Updater for Notebook kernelspec versions.
check_nb_kernel.py CMD [-h] [--path PATH] [--target TARGET] [--verbose]
CMD is one of:
{check, list, update} (default is "check")
list - shows list of internal kernelspecs that can be used
check - checks the notebook or notebooks for comformance to kernspecs
update - updates notebook or notebooks to target kernelspec
optional arguments:
-h, --help show this help message and exit
--path PATH, -p PATH Path search for notebooks. Can be a single file,
a directory path or a 'glob'-compatible wildcard.
(e.g. "*" for all files in current folder, "**/*"
for all files in folder and subfolders)
Defaults to current directory.
--target TARGET, -t TARGET
Target kernel spec to check or set.
Required for 'update' command
--verbose, -v Show details of all checked notebooks. Otherwise
only list notebooks with errors or updated notebooks.
Notes
-----
If CMD is 'update' you must specify a kernelspec target. The updated
notebook is written to the same name as the input. The old version is
saved as {input-notebook-name}.{previous-kernelspec-name}.pynb
If CMD is 'check', target is optional and it reports any notebooks
with kernelspecs different to internal kernelspecs
(you can view the built-in kernelspecs with 'list' command)
as errors.
"""
import argparse
from pathlib import Path
from typing import Iterable, List
import sys
import nbformat
IP_KERNEL_SPEC = {
"python36": {
"name": "python36",
"language": "python",
"display_name": "Python 3.6",
},
"python3": {"name": "python3", "language": "python", "display_name": "Python 3"},
"azureml_36": {
"name": "python3-azureml",
"language": "python",
"display_name": "Python 3.6 - AzureML"
},
"azureml_38": {
"name": "python38-azureml",
"language": "python",
"display_name": "Python 3.8 - AzureML"
},
'papermill': {'display_name': 'papermill', 'language': 'python', 'name': 'papermill'},
'.net-csharp':
{'display_name': '.NET (C#)', 'language': 'C#', 'name': '.net-csharp'},
'.net-powershell':
{'display_name': '.NET (PowerShell)', 'language': 'PowerShell', 'name': '.net-powershell'},
}
_LEGAL_KERNELS = ["azureml_38", "papermill", ".net-csharp", ".net-powershell", "azureml_36"]
def check_notebooks(nb_path: str, k_tgts: Iterable[str], verbose: bool = False):
"""Check notebooks for valid kernelspec."""
err_count = 0
good_count = 0
for nbook in _get_notebook_paths(nb_path):
if ".ipynb_checkpoints" in str(nbook):
continue
try:
nb_obj = nbformat.read(str(nbook), as_version=4.0)
except nbformat.reader.NotJSONError as err:
print(f"Error reading {nbook}\n{err}")
err_count += 1
continue
kernelspec = nb_obj.get("metadata", {}).get("kernelspec", None)
if not kernelspec:
print("Error: no kernel information.")
continue
nb_ok = False
for config in k_tgts:
tgt_spec = IP_KERNEL_SPEC[config]
for k_name, k_item in kernelspec.items():
if tgt_spec[k_name] != k_item:
break
else:
nb_ok = True
if not nb_ok:
err_count += 1
_print_nb_header(nbook)
print("ERROR - Invalid kernelspec '" f"{kernelspec.get('name')}" "'")
print(" ", kernelspec, "\n")
continue
if verbose:
_print_nb_header(nbook)
print(f"{kernelspec['name']} ok\n")
good_count += 1
print(f"{good_count} notebooks with no errors, {err_count} with errors")
return good_count, err_count
def _get_notebook_paths(nb_path: str):
"""Generate notebook paths."""
if "*" in nb_path:
for glob_path in Path().glob(nb_path):
if glob_path.is_file() and glob_path.suffix.casefold() == ".ipynb":
yield glob_path
elif Path(nb_path).is_dir():
yield from Path(nb_path).glob("*.ipynb")
elif Path(nb_path).is_file():
yield Path(nb_path)
def _print_nb_header(nbook_path):
print(str(nbook_path.name))
print("-" * len(str(nbook_path.name)))
print(str(nbook_path.resolve()))
def set_kernelspec(nb_path: str, k_tgt: str, verbose: bool = False):
"""Update specified notebooks to `k_tgt` kernelspec."""
changed_count = 0
good_count = 0
for nbook in _get_notebook_paths(nb_path):
if ".ipynb_checkpoints" in str(nbook):
continue
with open(str(nbook), "r") as nb_read:
nb_obj = nbformat.read(nb_read, as_version=4.0)
kernelspec = nb_obj.get("metadata", {}).get("kernelspec", None)
current_kspec_name = kernelspec.get("name")
if not kernelspec:
print("Error: no kernel information.")
continue
updated = False
tgt_spec = IP_KERNEL_SPEC[k_tgt]
for k_name, k_item in kernelspec.items():
if tgt_spec[k_name] != k_item:
updated = True
kernelspec[k_name] = tgt_spec[k_name]
if updated:
changed_count += 1
_print_nb_header(nbook)
print(
f"Kernelspec updated from '{current_kspec_name}' to '"
f"{kernelspec.get('name')}"
"'"
)
print(" ", kernelspec, "\n")
backup_path = (
f"{str(nbook).strip(nbook.suffix)}-{current_kspec_name}{nbook.suffix}"
)
nbook.rename(backup_path)
nbformat.write(nb_obj, str(nbook))
continue
if verbose:
_print_nb_header(nbook)
print(f"{kernelspec['name']} ok\n")
good_count += 1
print(f"{good_count} notebooks with no changes, {changed_count} updated")
def _add_script_args():
parser = argparse.ArgumentParser(description="Notebook kernelspec checker.")
parser.add_argument(
"cmd", default="check", type=str, choices=["check", "list", "update"],
)
parser.add_argument(
"--path", "-p", default=".", required=False, help="Path search for notebooks."
)
parser.add_argument(
"--target", "-t", nargs="+", required=False, help="Target kernel spec(s) to check or set."
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Show details of all checked notebooks.",
)
return parser
def _view_targets():
print("Valid targets:")
for kernel, settings in IP_KERNEL_SPEC.items():
print(f"{kernel}:")
print(" ", settings)
# pylint: disable=invalid-name
if __name__ == "__main__":
arg_parser = _add_script_args()
args = arg_parser.parse_args()
if args.cmd == "list":
_view_targets()
sys.exit(0)
krnl_tgts: List[str] = []
if args.target:
krnl_tgts = args.target
for krnl_tgt in krnl_tgts:
if krnl_tgt not in IP_KERNEL_SPEC:
print("'target' must be a valid kernelspec definition")
print("Valid kernel specs:")
_view_targets()
sys.exit(1)
krnl_tgts = krnl_tgts or _LEGAL_KERNELS
if not args.path:
print("check and update commands need a 'path' parameter.")
sys.exit(1)
if args.cmd == "check":
ok_count, err_count = check_notebooks(args.path, krnl_tgts, verbose=args.verbose)
if err_count:
sys.exit(1)
sys.exit(0)
if args.cmd == "update":
if len(krnl_tgts) > 1:
print(
"Multiple targets specified for update.",
f"Using first value {krnl_tgts[0]}"
)
krnl_tgt = krnl_tgts[0]
if not krnl_tgt:
print("A kernel target must be specified with 'update'.")
sys.exit(1)
set_kernelspec(args.path, krnl_tgt, verbose=args.verbose)
sys.exit(0)