-
Notifications
You must be signed in to change notification settings - Fork 255
/
search.py
executable file
·52 lines (35 loc) · 1.12 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
"""
Filter tweet JSON based on a regular expression to apply to the text of the
tweet.
search.py <regex> file1
Or if you want a case insensitive match:
search.py -i <regex> file1
"""
from __future__ import print_function
import re
import sys
import json
import argparse
import fileinput
from twarc import json2csv
if len(sys.argv) == 1:
sys.exit("usage: search.py <regex> file1 file2")
parser = argparse.ArgumentParser(description="filter tweets by regex")
parser.add_argument('-i', '--ignore', dest='ignore', action='store_true',
help='ignore case')
parser.add_argument('regex')
parser.add_argument('files', metavar='FILE', nargs='*', default=['-'], help='files to read, if empty, stdin is used')
args = parser.parse_args()
flags = 0
if args.ignore:
flags = re.IGNORECASE
try:
regex = re.compile(args.regex, flags)
except Exception as e:
sys.exit("error: regex failed to compile: {}".format(e))
for line in fileinput.input(files=args.files):
tweet = json.loads(line)
text = json2csv.text(tweet)
if regex.search(text):
print(line, end='')