-
Notifications
You must be signed in to change notification settings - Fork 1
/
parse_requirements.py
98 lines (77 loc) · 3.25 KB
/
parse_requirements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import re
from pathlib import Path
REGEXP_PACKAGE_NAME_VERSION = re.compile("[ ><=,]")
# TODO: search inside setup.py files (`install_requires`)
# TODO: search inside Pipfile files
def parse_all_requirements(path):
"""Yield each Python requirement inside any requirements file in a path
Will search recursively for files like:
- `requirements.txt`
- `dev-requirements.txt`
- `requirements-dev.txt`
and for each of them, yield each package
"""
if not isinstance(path, Path): # str
path = Path(path)
for filename in path.glob("**/*.txt"):
if "requirements" in filename.name.lower():
for package in parse_requirements(filename):
yield filename, package
def parse_requirements(filename):
"""Yield each package name inside a requirements.txt filename"""
with open(filename) as fobj:
for line in fobj:
package = parse_requirements_line(line)
if package is not None: # None when empty line or comment
yield package
def parse_requirements_line(line):
"""
>>> repr(parse_requirements_line(''))
'None'
>>> repr(parse_requirements_line('# Hello, comment!'))
'None'
>>> parse_requirements_line('rows ')
'rows'
>>> parse_requirements_line('rows >= 0.4.0')
'rows'
>>> parse_requirements_line('rows==0.4.2')
'rows'
>>> parse_requirements_line('https://github.com/turicas/rows/archive/develop.zip')
'https://github.com/turicas/rows/archive/develop.zip'
"""
line = line.strip()
if not line or line.startswith("#") or line.startswith("-"):
return None
elif line.startswith("http:") or line.startswith("https:"):
return line
else:
return REGEXP_PACKAGE_NAME_VERSION.split(line)[0]
def print_status(text):
"""Print a status text message, cleaning the last printed line"""
if not hasattr(print_status, "last_status_len"):
print_status.last_status_len = 0
print("\r" + " " * print_status.last_status_len, end="", flush=True)
print("\r" + text, end="", flush=True)
print_status.last_status_len = len(text)
if __name__ == "__main__":
import argparse
import csv
parser = argparse.ArgumentParser()
parser.add_argument("output_filename")
parser.add_argument("path", nargs="+")
args = parser.parse_args()
current_path = Path(".").absolute()
with open(args.output_filename, mode="w") as fobj:
writer = csv.DictWriter(fobj, fieldnames=["repository_path", "requirements_filename", "package"])
writer.writeheader()
total_found = 0
last_status_len = 0
for path in args.path:
path = Path(path).absolute()
repository_path = path.relative_to(current_path)
for found_inside_package, (filename, package) in enumerate(parse_all_requirements(path), start=1):
filename = repository_path / filename
writer.writerow({"repository_path": str(filename.parent), "requirements_filename": filename.name, "package": package})
total_found += 1
print_status(f"Searching {repository_path}... {found_inside_package:02d} found (total: {total_found:03d})")
print_status(f"Done! Check {args.output_filename} for results.\n")