-
Notifications
You must be signed in to change notification settings - Fork 196
/
Copy pathcoordinate_import.py
executable file
·179 lines (134 loc) · 5.55 KB
/
coordinate_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
r"""Coordinate importing script.
Usage:
python pwb.py coordinate_import -site:wikipedia:en \
-cat:Category:Coordinates_not_on_Wikidata
This will work on all pages in the category "coordinates not on Wikidata"
and will import the coordinates on these pages to Wikidata.
The data from the "GeoData" extension
(https://www.mediawiki.org/wiki/Extension:GeoData)
is used so that extension has to be setup properly. You can look at the
[[Special:Nearby]] page on your local Wiki to see if it's populated.
You can use any typical pagegenerator to provide with a list of pages:
python pwb.py coordinate_import -lang:it -family:wikipedia -namespace:0 \
-transcludes:Infobox_stazione_ferroviaria
You can also run over a set of items on the repo without coordinates and
try to import them from any connected page. To do this, you have to
explicitly provide the repo as the site using -site argument.
Example:
python pwb.py coordinate_import -site:wikidata:wikidata -namespace:0 \
-querypage:Deadendpages
The following command line parameters are supported:
-always If used, the bot won't ask if it should add the specified text.
-create Create items for pages without one.
.. note:: This script is a :class:`ConfigParserBot <bot.ConfigParserBot>`.
All options can be set within a settings file which is scripts.ini by
default.
¶ms;
"""
#
# (C) Pywikibot team, 2013-2024
#
# Distributed under the terms of MIT license.
#
from __future__ import annotations
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import ConfigParserBot, WikidataBot
from pywikibot.exceptions import CoordinateGlobeUnknownError
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class CoordImportRobot(ConfigParserBot, WikidataBot):
"""A bot to import coordinates to Wikidata.
.. versionchanged:: 7.0
CoordImportRobot is a ConfigParserBot
"""
use_from_page = None
def __init__(self, **kwargs) -> None:
"""Initializer."""
self.available_options['create'] = False
super().__init__(**kwargs)
self.cacheSources()
self.prop = 'P625'
self.create_missing_item = self.opt.create
def has_coord_qualifier(self, claims) -> str | None:
"""Check if self.prop is used as property for a qualifier.
:param claims: the Wikibase claims to check in
:type claims: dict
:return: the first property for which self.prop
is used as qualifier, or None if any
"""
for prop in claims:
for claim in claims[prop]:
if self.prop in claim.qualifiers:
return prop
return None
def item_has_coordinates(self, item) -> bool:
"""Check if the item has coordinates.
:return: whether the item has coordinates
"""
claims = item.get().get('claims')
if self.prop in claims:
pywikibot.info(f'Item {item.title()} already contains coordinates '
f'({self.prop})')
return True
prop = self.has_coord_qualifier(claims)
if prop:
pywikibot.info(f'Item {item.title()} already contains coordinates '
f'({self.prop}) as qualifier for {prop}')
return True
return False
def treat_page_and_item(self, page, item) -> None:
"""Treat page/item."""
if self.item_has_coordinates(item):
return
if page is None:
# running over items, search in linked pages
for p in item.iterlinks():
if p.site.has_extension('GeoData') \
and self.try_import_coordinates_from_page(p, item):
break
return
self.try_import_coordinates_from_page(page, item)
def try_import_coordinates_from_page(self, page, item) -> bool:
"""Try import coordinate from the given page to the given item.
:return: whether any coordinates were found and the import
was successful
"""
coordinate = page.coordinates(primary_only=True)
if not coordinate:
return False
newclaim = pywikibot.Claim(self.repo, self.prop)
newclaim.setTarget(coordinate)
source = self.getSource(page.site)
if source:
newclaim.addSource(source)
pywikibot.info(
f'Adding {coordinate.lat}, {coordinate.lon} to {item.title()}')
# TODO: handle exceptions using self.user_add_claim
try:
item.addClaim(newclaim)
except CoordinateGlobeUnknownError as e:
pywikibot.info(f'Skipping unsupported globe: {e.args}')
return False
return True
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line argument
"""
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
generator_factory = pagegenerators.GeneratorFactory()
# Process pagegenerators args
local_args = generator_factory.handle_args(local_args)
create_new = False
for arg in local_args:
if arg == '-create':
create_new = True
# FIXME: this preloading preloads neither coordinates nor Wikibase items
# but preloads wikitext which we don't need
generator = generator_factory.getCombinedGenerator(preload=True)
coordbot = CoordImportRobot(generator=generator, create=create_new)
coordbot.run()
if __name__ == '__main__':
main()