In [173]:
import pandas as pd
import GetOldTweets3 as got
from time import sleep

In [174]:
def get_tweets(username, top_only, start_date, end_date, max_tweets):

    tweetCriteria = got.manager.TweetCriteria().setUsername(username)\
                    .setTopTweets(top_only)\
                    .setSince(start_date)\
                    .setUntil(end_date)\
                    .setMaxTweets(max_tweets)

    tweets = got.manager.TweetManager.getTweets(tweetCriteria)
    print(f"Total tweets scraped: {len(tweets)}")

    text_tweets = [[tw.username,
                tw.text,
                tw.date,
                tw.retweets,
                tw.favorites,
                tw.mentions,
                tw.hashtags] for tw in tweets]
    
    news_df = pd.DataFrame(text_tweets, 
                            columns = ['User', 'Text', 'Date', 'Favorites', 'Retweets', 'Mentions', 'HashTags'])
    
    return news_df

In [175]:
news_sources = ['La_DOTD','TxDot','BR_Traffic', 'NOLA_Traffic','Laf_Traffic', 'NS_Traffic', 
                'LC_Traffic', 'wbrztraffic', 'WAFBTraffic', 'LAStatePolice', 'BTRtraffic']

news_df = get_tweets(news_sources, 
                     top_only = False,
                     start_date = "2020-08-07", 
                     end_date = "2020-09-04",
                     max_tweets = 2000).sort_values('Date', ascending=False)
news_df.head()

Total tweets scraped: 1931


Unnamed: 0,User,Text,Date,Favorites,Retweets,Mentions,HashTags
0,BTRtraffic,RT BR_Traffic The center lane is blocked on I-...,2020-09-03 23:56:21+00:00,0,0,,
1,BR_Traffic,The center lane is blocked on I-10 West at Ess...,2020-09-03 23:50:57+00:00,1,0,,
2,BTRtraffic,RT BR_Traffic Congestion remains on I-12 East ...,2020-09-03 23:30:46+00:00,0,0,,
3,NS_Traffic,Congestion remains on I-12 East from Mile Mark...,2020-09-03 23:30:37+00:00,0,0,,
4,BR_Traffic,Congestion remains on I-12 East from Mile Mark...,2020-09-03 23:30:32+00:00,1,0,,


In [176]:
news_df.shape

(1931, 7)

In [177]:
news_df.head(50)

Unnamed: 0,User,Text,Date,Favorites,Retweets,Mentions,HashTags
0,BTRtraffic,RT BR_Traffic The center lane is blocked on I-...,2020-09-03 23:56:21+00:00,0,0,,
1,BR_Traffic,The center lane is blocked on I-10 West at Ess...,2020-09-03 23:50:57+00:00,1,0,,
2,BTRtraffic,RT BR_Traffic Congestion remains on I-12 East ...,2020-09-03 23:30:46+00:00,0,0,,
3,NS_Traffic,Congestion remains on I-12 East from Mile Mark...,2020-09-03 23:30:37+00:00,0,0,,
4,BR_Traffic,Congestion remains on I-12 East from Mile Mark...,2020-09-03 23:30:32+00:00,1,0,,
5,BTRtraffic,RT BR_Traffic All lanes are now open on I-10 W...,2020-09-03 23:15:38+00:00,0,0,,
6,BR_Traffic,All lanes are now open on I-10 West on the Mis...,2020-09-03 23:11:17+00:00,3,0,,
7,BTRtraffic,RT BR_Traffic The center lane is blocked on I-...,2020-09-03 23:11:09+00:00,0,0,,
8,BR_Traffic,The center lane is blocked on I-10 West at Per...,2020-09-03 23:07:12+00:00,0,0,,
9,BTRtraffic,RT BR_Traffic There is congestion on I-12 East...,2020-09-03 22:41:13+00:00,0,0,,


In [178]:
news_df.to_csv('tweets_data/news_df.csv', index = False)

In [152]:
def get_tweets2(keyword_list, top_only, city, start_date, end_date, max_tweets):
    
    df = pd.DataFrame()
    
    for i in keyword_list:

        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(i)\
                    .setTopTweets(top_only)\
                    .setNear(city)\
                    .setWithin("200mi")\
                    .setSince(start_date)\
                    .setUntil(end_date)\
                    .setMaxTweets(max_tweets)

        tweets = got.manager.TweetManager.getTweets(tweetCriteria)

        text_tweets = [[tw.username,
                tw.text,
                tw.date,
                tw.retweets,
                tw.favorites,
                tw.mentions,
                tw.hashtags] for tw in tweets]
    
        df2 = pd.DataFrame(text_tweets, 
                            columns = ['User', 'Text', 'Date', 'Favorites', 'Retweets', 'Mentions', 'HashTags'])
        
        df = df.append(df2)
        
        print(f"Total tweets scraped: {len(df)}")
        
        sleep(300)
        
    return df
    

In [166]:
keywords1 = ['hurricane', 'closed', 'close']
keywords2 = ['congestion', 'blocked', 'highway', 
            'traffic']
keywords3 = ['route', 'open', 'lane', 'road']
keywords4 = ['shut down', 'detour', 'evacuation', 'hwy']

news4_df = get_tweets2(keyword_list = keywords4,
                     top_only = False,
                     city = "Lake Charles, Louisiana",
                     start_date = "2020-08-24", 
                     end_date = "2020-09-01",
                     max_tweets = 10_000).sort_values('Date', ascending=False)
    
news4_df.head()

Total tweets scraped: 146
Total tweets scraped: 150
Total tweets scraped: 453
Total tweets scraped: 621


Unnamed: 0,User,Text,Date,Favorites,Retweets,Mentions,HashTags
1,LenovoLegion,Lenovo Legion x @PlayApex Now is your chance t...,2020-09-03 18:26:48+00:00,9,106,@PlayApex,
140,LenovoLegion,Lenovo Legion x @PlayApex Now is your chance t...,2020-09-03 18:26:48+00:00,9,106,@PlayApex,
22,LenovoLegion,Lenovo Legion x @PlayApex Now is your chance t...,2020-09-03 18:26:48+00:00,9,106,@PlayApex,
43,LenovoLegion,Lenovo Legion x @PlayApex Now is your chance t...,2020-09-03 18:26:48+00:00,9,106,@PlayApex,
85,LenovoLegion,Lenovo Legion x @PlayApex Now is your chance t...,2020-09-03 18:26:48+00:00,9,106,@PlayApex,


In [167]:
news4_df.shape

(621, 7)

In [168]:
news4_df.tail()

Unnamed: 0,User,Text,Date,Favorites,Retweets,Mentions,HashTags
143,KirstenMaya,"We riding down highland, and LSU might as well...",2020-08-24 01:58:15+00:00,2,7,,
144,Willie_Beamerr,Peaceful Protesters shut down Ambassador today..,2020-08-24 01:06:58+00:00,0,1,,
301,Shawdaaaa,I’m not 100% if we have to evacuate but I’m su...,2020-08-24 01:06:51+00:00,0,0,,
302,Shawdaaaa,This what I’m seeing. If it’s a mandatory evac...,2020-08-24 00:47:47+00:00,0,0,,
145,OneDamnLuckyGuy,Not even 2 minutes into the interview @DavidMu...,2020-08-24 00:13:16+00:00,0,0,@DavidMuir @POTUS @ABCPolitics,


In [169]:
news4_df.to_csv('tweets_data/news4_df.csv', index = False)

In [179]:
csv_list = ["news_df.csv", "news1_df.csv", "news2_df.csv", "news3_df.csv", "news4_df.csv"]

df_list = []
for i in csv_list:
    df_list.append(pd.read_csv('tweets_data/' + i))

lake_ch_tweets_2 = pd.concat(df_list)

lake_ch_tweets_2.shape

(12680, 7)

In [180]:
lake_ch_tweets_2.to_csv('tweets_data/lake_ch_tweets_2.csv', index = False)