-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
Copy pathsource.py
246 lines (186 loc) · 6.21 KB
/
source.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# Copyright 2019 Google
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fnmatch
import logging
import os
import re
import textwrap
from lib import command_trace
# Paths under which all files should be ignored
IGNORE = frozenset([
'Firestore/Protos/nanopb',
'Firestore/Protos/cpp',
'Firestore/Protos/objc',
'Firestore/third_party/abseil-cpp',
])
FIRESTORE_CORE = ['Firestore/core']
FIRESTORE_OBJC = ['Firestore/Source', 'Firestore/Example/Tests']
FIRESTORE_SWIFT = ['Firestore/Swift']
FIRESTORE_TESTS = ['Firestore/core/test', 'Firestore/Example/Tests']
CC_DIRS = FIRESTORE_CORE
CC_EXTENSIONS = ['.h', '.cc']
OBJC_DIRS = FIRESTORE_CORE + FIRESTORE_OBJC
OBJC_EXTENSIONS = ['.h', '.m', '.mm']
PYTHON_DIRS = ['scripts']
PYTHON_EXTENSIONS = ['.py']
SOURCE_EXTENSIONS = [
'.c',
'.cc',
'.cmake',
'.h',
'.js',
'.m',
'.mm',
'.py',
'.rb',
'.sh',
'.swift'
]
_DEFINITE_EXTENSIONS = {
'.cc': 'cc',
'.m': 'objc',
'.mm': 'objc',
'.py': 'py',
}
_classify_logger = logging.getLogger('lint.classify')
class LanguageBreakdown:
"""Files broken down by source language."""
def __init__(self):
self.cc = []
self.objc = []
self.py = []
self.all = []
self.kinds = {
'cc': self.cc,
'objc': self.objc,
'py': self.py,
}
def classify(self, kind, reason, filename):
_classify_logger.debug('classify %s: %s (%s)' % (kind, filename, reason))
self.kinds[kind].append(filename)
self.all.append(filename)
@staticmethod
def ignore(filename):
_classify_logger.debug('classify ignored: %s' % filename)
def categorize_files(files):
"""Breaks down the given list of files by language.
Args:
files: a list of files
Returns:
A LanguageBreakdown instance containing all the files that match a
recognized source language.
"""
result = LanguageBreakdown()
for filename in files:
if _in_directories(filename, IGNORE):
continue
ext = os.path.splitext(filename)[1]
definite = _DEFINITE_EXTENSIONS.get(ext)
if definite:
result.classify(definite, 'extension', filename)
continue
if ext == '.h':
if _in_directories(filename, CC_DIRS):
# If a header exists in the C++ core, ignore related files. Some classes
# may transiently have an implementation in a .mm file, but hold the
# header to the higher standard: the implementation should eventually
# be in a .cc, otherwise the file doesn't belong in the core.
result.classify('cc', 'directory', filename)
continue
related_ext = _related_file_ext(filename)
if related_ext == '.cc':
result.classify('cc', 'related file', filename)
continue
if related_ext in ('.m', '.mm'):
result.classify('objc', 'related file', filename)
continue
if _in_directories(filename, OBJC_DIRS):
result.classify('objc', 'directory', filename)
continue
raise NotImplementedError(textwrap.dedent(
"""
Don't know how to handle the header %s.
If C++ add a parent directory to CC_DIRS in lib/source.py.
If Objective-C add to OBJC_DIRS or consider changing the default here
and removing this exception.""" % filename))
result.ignore(filename)
return result
def shard(group, num_shards):
"""Breaks the group apart into num_shards shards.
Args:
group: a breakdown, perhaps returned from categorize_files.
num_shards: The number of shards into which to break down the group.
Returns:
A list of shards.
"""
shards = []
for i in range(num_shards):
shards.append(LanguageBreakdown())
pos = 0
for kind, files in group.kinds.items():
for filename in files:
shards[pos].kinds[kind].append(filename)
pos = (pos + 1) % num_shards
return shards
_PLUS = re.compile(r'\+.*')
def _related_file_ext(header):
"""Returns the dominant extension among related files.
A file is related if it starts with the same prefix. Prefix is the basename
without extension, and stripping off any + category names that are common in
Objective-C.
For example: executor.h has related files executor_std.cc and
executor_libdispatch.mm.
If there are multiple related files, the implementation chooses one based
on which language is most restrictive. That is, if a header serves both C++
and Objective-C++ implementations, lint the header as C++ to prevent issues
that might arise in that mode.
Returns:
The file extension (e.g. '.cc')
"""
parent = os.path.dirname(header)
basename = os.path.basename(header)
root = os.path.splitext(basename)[0]
root = _PLUS.sub('', root)
root = os.path.join(parent, root)
files = _related_files(root)
exts = {os.path.splitext(f)[1] for f in files}
for ext in ('.cc', '.m', '.mm'):
if ext in exts:
return ext
return None
def _related_files(root):
"""Returns a list of files related to the given root.
"""
parent = os.path.dirname(root)
if not parent:
# dirname returns empty for filenames that are already a basename.
parent = '.'
pattern = os.path.basename(root) + '*'
return fnmatch.filter(_list_files(parent), pattern)
def _list_files(parent):
"""Lists files contained directly in the parent directory."""
result = _list_files.cache.get(parent)
if result is None:
command_trace.log(['ls', parent])
result = os.listdir(parent)
_list_files.cache[parent] = result
return result
_list_files.cache = {}
def _in_directories(filename, dirs):
"""Tests whether `filename` is anywhere in any of the given dirs."""
for dirname in dirs:
if (filename.startswith(dirname)
and (len(filename) == len(dirname) or filename[len(dirname)] == '/')):
return True
return False