-
Notifications
You must be signed in to change notification settings - Fork 5
/
twitter_test.py
57 lines (51 loc) · 1.61 KB
/
twitter_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import tweepy
import csv
LATIN_1_CHARS = (
('\xe2\x80\x99', "'"),
('\xc3\xa9', 'e'),
('\xe2\x80\x90', '-'),
('\xe2\x80\x91', '-'),
('\xe2\x80\x92', '-'),
('\xe2\x80\x93', '-'),
('\xe2\x80\x94', '-'),
('\xe2\x80\x94', '-'),
('\xe2\x80\x98', "'"),
('\xe2\x80\x9b', "'"),
('\xe2\x80\x9c', '"'),
('\xe2\x80\x9c', '"'),
('\xe2\x80\x9d', '"'),
('\xe2\x80\x9e', '"'),
('\xe2\x80\x9f', '"'),
('\xe2\x80\xa6', '...'),
('\xe2\x80\xb2', "'"),
('\xe2\x80\xb3', "'"),
('\xe2\x80\xb4', "'"),
('\xe2\x80\xb5', "'"),
('\xe2\x80\xb6', "'"),
('\xe2\x80\xb7', "'"),
('\xe2\x81\xba', "+"),
('\xe2\x81\xbb', "-"),
('\xe2\x81\xbc', "="),
('\xe2\x81\xbd', "("),
('\xe2\x81\xbe', ")")
)
def clean_latin1(data):
try:
return data.encode('utf-8')
except UnicodeDecodeError:
data = data.decode('iso-8859-1')
for _hex, _char in LATIN_1_CHARS:
data = data.replace(_hex, _char)
return data.encode('utf8')
consumer_key = 'TErus3u1UOUgdR6AKhIaaE9Wg'
consumer_secret = 'UI8yW0epbpuX9CxNl91eSVCGtmDh9IBkUvopH2TvPIaLjVgOCy'
access_token = '1234781863597760512-SAmWryVIvcqjHM44kTbHhTS0SFExu6'
access_token_secret = 'QPigS2G8RF6wtxiVROnwTAzduh16CuewaBtgOgQdAa5Zv'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)
hashtag = 'coronavirus'
for tweet in tweepy.Cursor(api.search,q=f"#{hashtag}",
lang="en",
since="2019-12-01").items(10):
print(tweet.text)