-
Notifications
You must be signed in to change notification settings - Fork 15
/
test_dataset_title_helper.py
executable file
·305 lines (298 loc) · 11.6 KB
/
test_dataset_title_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""Dataset Title Helper Tests"""
from datetime import datetime, timezone
import pytest
from hdx.data.dataset_title_helper import DatasetTitleHelper
class TestDatasetTitleHelper:
@pytest.fixture
def expected_ranges_2019(self):
return [
(
datetime(2019, 7, 1, 0, 0, tzinfo=timezone.utc),
datetime(2019, 7, 31, 23, 59, 59, tzinfo=timezone.utc),
)
]
def test_fuzzy_match_dates_in_title(self, expected_ranges_2019):
ignore_wrong_years = []
ranges = []
assert (
DatasetTitleHelper.fuzzy_match_dates_in_title(
"Myanmar Town July 2019", ranges, ignore_wrong_years
)
== "Myanmar Town"
)
assert ranges == expected_ranges_2019
ranges = []
assert (
DatasetTitleHelper.fuzzy_match_dates_in_title(
"Myanmar Town 2019 July", ranges, ignore_wrong_years
)
== "Myanmar Town"
)
assert ranges == expected_ranges_2019
def test_get_date_from_title(self, expected_ranges_2019):
assert DatasetTitleHelper.get_dates_from_title(
"Myanmar Self Administered Regions Boundaries MIMU v9.2.1"
) == (
"Myanmar Self Administered Regions Boundaries MIMU v9.2.1",
[],
)
assert DatasetTitleHelper.get_dates_from_title(
"Myanmar Town 2019 July"
) == (
"Myanmar Town",
expected_ranges_2019,
)
assert DatasetTitleHelper.get_dates_from_title(
"Formal Sector School Location Upper Myanmar ( 2019 )"
) == (
"Formal Sector School Location Upper Myanmar",
[
(
datetime(2019, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2019, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"ICA Armenia, 2017 - Drought Risk, 1981-2015"
) == (
"ICA Armenia - Drought Risk",
[
(
datetime(1981, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2015, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2017, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2017, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"ICA Sudan, 2018 - Land Degradation, 2001-2013"
) == (
"ICA Sudan - Land Degradation",
[
(
datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2013, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2018, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2018, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Central African Republic, Bridges, January 2019"
) == (
"Central African Republic, Bridges",
[
(
datetime(2019, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2019, 1, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Afghanistan:District Accessibility for WFP and Partners Staff as of 05 May 2019"
) == (
"Afghanistan:District Accessibility for WFP and Partners Staff",
[
(
datetime(2019, 5, 5, 0, 0, tzinfo=timezone.utc),
datetime(2019, 5, 5, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Tanintharyi Region Land Cover - March 2016 (Original)"
) == (
"Tanintharyi Region Land Cover (Original)",
[
(
datetime(2016, 3, 1, 0, 0, tzinfo=timezone.utc),
datetime(2016, 3, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Kachin State and Sagaing Region 2002-2014 Forest Cover Change"
) == (
"Kachin State and Sagaing Region Forest Cover Change",
[
(
datetime(2002, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2014, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Ward boundaries Yangon City_mimu_v8_1"
) == ("Ward boundaries Yangon City_mimu_v8_1", list())
assert DatasetTitleHelper.get_dates_from_title(
"Mon_State_Village_Tract_Boundaries"
) == ("Mon_State_Village_Tract_Boundaries", list())
assert DatasetTitleHelper.get_dates_from_title(
"ICA Afghanistan, 2019 - Landslide hazard, 2013"
) == (
"ICA Afghanistan - Landslide hazard",
[
(
datetime(2013, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2013, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2019, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2019, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Afghanistan Percentage of Food Insecure Population Based on Combined Food Consumption Score and Coping Strategy Index by Province - ALCS 2013/14"
) == (
"Afghanistan Percentage of Food Insecure Population Based on Combined Food Consumption Score and Coping Strategy Index by Province - ALCS",
[
(
datetime(2013, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2014, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title("ALCS 2000/10") == (
"ALCS",
[
(
datetime(2000, 10, 1, 0, 0, tzinfo=timezone.utc),
datetime(2000, 10, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"ALCS 2014/13"
) == ( # not a month and range going down
"ALCS 2014/13",
[],
)
assert DatasetTitleHelper.get_dates_from_title(
"Mon_State_Village_Tract_Boundaries 9999"
) == ("Mon_State_Village_Tract_Boundaries 9999", list())
assert DatasetTitleHelper.get_dates_from_title(
"Mon_State_Village_Tract_Boundaries 10/12/01 lala"
) == ("Mon_State_Village_Tract_Boundaries 10/12/01 lala", list())
# It's the "Mon" that makes an extra date component that causes it to ignore the date (correctly)
assert DatasetTitleHelper.get_dates_from_title(
"State_Village_Tract_Boundaries 10/12/01 lala"
) == (
"State_Village_Tract_Boundaries lala",
[
(
datetime(2001, 12, 10, 0, 0, tzinfo=timezone.utc),
datetime(2001, 12, 10, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Crops production (2016) - Tajikistan Vulnerability & Resilience Atlas, 2019"
) == (
"Crops production - Tajikistan Vulnerability & Resilience Atlas",
[
(
datetime(2016, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2016, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2019, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2019, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Location of partners as of Feb. 5, 2019"
) == (
"Location of partners",
[
(
datetime(2019, 2, 5, 0, 0, tzinfo=timezone.utc),
datetime(2019, 2, 5, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"ICA Armenia, 2016 & 2017 - Land Degradation, 2001-2012"
) == (
"ICA Armenia - Land Degradation",
[
(
datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2012, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2016, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2017, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"ICA Afghanistan, 2016 - Food Insecurity Risk, 2007/08-2014"
) == (
"ICA Afghanistan - Food Insecurity Risk",
[
(
datetime(2007, 8, 1, 0, 0, tzinfo=timezone.utc),
datetime(2014, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2016, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2016, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Risk, 2020/19-2014"
) == (
"Risk, 2020/19",
[
(
datetime(2014, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2014, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"south sudan access constraints shp for 20200124"
) == (
"south sudan access constraints shp",
[
(
datetime(2020, 1, 24, 0, 0, tzinfo=timezone.utc),
datetime(2020, 1, 24, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Cambodia Flood Extent in 2011"
) == (
"Cambodia Flood Extent",
[
(
datetime(2011, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2011, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
)
],
)
assert DatasetTitleHelper.get_dates_from_title(
"Access: Proportion of the population consuming less than 2100 kcal per day (average of 2011-2013), National Statistics Committee 2013"
) == (
"Access: Proportion of the population consuming less than 2100 kcal per day (average), National Statistics Committee",
[
(
datetime(2011, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2013, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
(
datetime(2013, 1, 1, 0, 0, tzinfo=timezone.utc),
datetime(2013, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
),
],
)