-
Notifications
You must be signed in to change notification settings - Fork 254
/
geojson.py
executable file
·120 lines (93 loc) · 3.06 KB
/
geojson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
"""
geojson.py reads in tweets and writes out a corresponding geojson file for the
tweets. Each feature will include the following properties:
* twitter user name
* twitter user screename
* tweet creation time
* tweet status text
* profile image url
* the tweet url
By default both Point and Polygon features will be included, depending on
whether the tweet includes a point or is assigned to a place with a bounding
box.
Optionally you can convert bounding boxes to points with the --centroid
parameter, and can also use --fuzz to randomly place the the point inside the
bounding box.
"""
from __future__ import print_function
import json
import random
import argparse
import fileinput
import dateutil.parser
def text(t):
return (
t.get("full_text") or t.get("extended_tweet", {}).get("full_text") or t["text"]
).replace("\n", " ")
parser = argparse.ArgumentParser()
parser.add_argument(
"-c",
"--centroid",
dest="centroid",
action="store_true",
default=False,
help="store centroid instead of a bounding box",
)
parser.add_argument(
"-f",
"--fuzz",
type=float,
dest="fuzz",
default=0,
help="add a random lon and lat shift to bounding box centroids (0-0.1)",
)
parser.add_argument(
"files", nargs="*", default=("-",), help="files to read, if empty, stdin is used"
)
args = parser.parse_args()
features = []
for line in fileinput.input(files=args.files):
tweet = json.loads(line)
t = dateutil.parser.parse(tweet["created_at"])
f = {
"type": "Feature",
"properties": {
"name": tweet["user"]["name"],
"screen_name": tweet["user"]["screen_name"],
"created_at": t.isoformat("T") + "Z",
"text": text(tweet),
"profile_image_url": tweet["user"]["profile_image_url"],
"url": "http://twitter.com/%s/status/%s"
% (tweet["user"]["screen_name"], tweet["id_str"]),
},
}
if tweet["geo"]:
f["geometry"] = {
"type": "Point",
"coordinates": [
tweet["geo"]["coordinates"][1],
tweet["geo"]["coordinates"][0],
],
}
elif tweet["place"] and any(tweet["place"]["bounding_box"]):
bbox = tweet["place"]["bounding_box"]["coordinates"][0]
if args.centroid:
min_x = bbox[0][0]
min_y = bbox[0][1]
max_x = bbox[2][0]
max_y = bbox[2][1]
fuzz_x = args.fuzz * random.uniform(-1, 1)
fuzz_y = args.fuzz * random.uniform(-1, 1)
center_x = ((max_x + min_x) / 2.0) + fuzz_x
center_y = ((max_y + min_y) / 2.0) + fuzz_y
f["geometry"] = {"type": "Point", "coordinates": [center_x, center_y]}
else:
f["geometry"] = {
"type": "Polygon",
"coordinates": [[bbox[0], bbox[1], bbox[2], bbox[3], bbox[0]]],
}
if "geometry" in f:
features.append(f)
geojson = {"type": "FeatureCollection", "features": features}
print(json.dumps(geojson, indent=2))