/
getTweetsCity.py
117 lines (96 loc) · 3.66 KB
/
getTweetsCity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import time
import tweepy
import datetime, csv
import configparser
#reload(sys)
#sys.setdefaultencoding('UTF-8')
def removeEntersAndCommas(tweet):
tweet = tweet.replace(',','')
tweet = tweet.replace('\n','')
tweet = tweet.replace('\r','')
return tweet
now = datetime.datetime.now()
day=int(now.day)
month=int(now.month)
year=int(now.year)
config = configparser.ConfigParser()
config.read('configuration.ini')
CONSUMER_KEY = config['AuthenticationParams']['consumer_key']
CONSUMER_SECRET = config['AuthenticationParams']['consumer_secret']
OAUTH_TOKEN = config['AuthenticationParams']['oauth_token']
OAUTH_TOKEN_SECRET = config['AuthenticationParams']['oauth_token_secret']
keywordsList = []
keywordsList = config['TwitterParser2']['keywordsList'].split(',')
cities = []
cities = config['TwitterParser2']['cities'].split(',')
cities_latlng = []
cities_latlng = config['TwitterParser2']['cities_latlng'].split(';')
#geo = config['TwitterParser1']['geocode_lat']+','+config['TwitterParser1']['geocode_lon']+','+config['TwitterParser1']['geocode_radius']
language = config['TwitterParser2']['lang']
#since_date = config['TwitterParser1']['since_date']
#until_date = config['TwitterParser1']['until_date']
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
api = tweepy.API(auth)
# Get twitter ids and with a condition to avoid limitation errors
recordsPerCity = float(config['TwitterParser2']['recordsPerCity'])
recordsPerCity = int(recordsPerCity)
#keyword = ['paris']
userIdsList = []
tweetIdsList = []
tweetCreated_atList = []
screen_nameList = []
tweetTypeList = []
authorList = []
textList = []
cityList = []
filePath = '.\\SampleData\\'
print('Start...\n')
csvfile_list = []
for city in range(0,len(cities)):
for keyword in keywordsList:
counter = 0
userIdsList = []
tweetIdsList = []
tweetCreated_atList = []
screen_nameList = []
tweetTypeList = []
authorList = []
textList = []
cityList = []
geo = cities_latlng[city]
print("Test...\n")
print(keyword)
print(cities[city])
print(geo)
c = tweepy.Cursor(api.search, q=keyword, geocode=geo,lang='en').items()
while True:
try:
tweet = c.next()
text = removeEntersAndCommas(tweet.text)
#print(text)
if(keyword.lower() in text.lower()):
if hasattr(tweet, 'retweeted_status'):
tweetType = ('r')
author = tweet.retweeted_status.id
else:
tweetType = 'o'
author = tweet.id
userId = tweet.user.id
tweetId = tweet.id
tweetCreated_at = tweet.created_at
screen_name = tweet.user.screen_name
counter = counter+1
csvfile_list.append(str(tweetId)+','+str(author)+','+keyword+','+str(tweetCreated_at)+','+str(userId)+','+screen_name+','+tweetType+','+cities[city]+','+str(text.encode("utf-8"))+'\n')
if counter >= recordsPerCity:
break;
except tweepy.TweepError:
time.sleep(60*15+15)
continue
except StopIteration:
break
with open(filePath+'twitterIds_dataset_cities.csv','wt') as csvfile:
csvfile.write('tweetId,author,keyword,tweetCreated_at,userId,screen_name,tweetType,city,text\n')
for line in csvfile_list:
csvfile.write(line)
csvfile.close()