In [2]:
#!pip install GetOldTweets3

Collecting GetOldTweets3
  Downloading GetOldTweets3-0.0.11-py3-none-any.whl (13 kB)
Collecting pyquery>=1.2.10
  Downloading pyquery-1.4.1-py2.py3-none-any.whl (22 kB)
Installing collected packages: pyquery, GetOldTweets3
Successfully installed GetOldTweets3-0.0.11 pyquery-1.4.1


In [3]:
from bs4 import BeautifulSoup
import GetOldTweets3 as got
import pandas as pd
import time  

## 트위터 수집

In [4]:
def get_tweets(Username=None, 
               Since=None, Until=None, 
               QuerySearch=None, 
               Near=None, Within=None, 
               MaxTweets=None):
    
    # 트위터 수집 기준 정의
    '''
    # 특정 유저 아이디로 트윗 검색 (setUsername)
    # 기간 안의 트윗 검색 (setSince / setUntil)
    # 특정 검색어가 포함된 트윗 검색 (setQuerySearch)
    # 기준 위치를 설정하고 근처에서 생성된 트윗 검색 (setNear / setWithin)
    # 출력할 최대 트윗 수 지정 (setMaxTweets)
    '''
    tweetCriteria = got.manager.TweetCriteria()
    
    if Username: tweetCriteria.setUsername(Username)
    if Since: tweetCriteria.setSince(Since)
    if Until: tweetCriteria.setUntil(Until)
    if QuerySearch: tweetCriteria.setQuerySearch(QuerySearch)
    if Near: tweetCriteria.setNear(Near)
    if Within: tweetCriteria.setWithin(Within)
    if MaxTweets: tweetCriteria.setMaxTweets(MaxTweets)

    # 트위터 수집
    print("Collecting data start.. from {} to {}".format(Since, Until))
    
    tic = time.time()
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)
    toc = time.time()

    print("Collecting data end.. {0:0.2f} Minutes".format((toc - tic)/60))
    print("=== Total num of tweets is {} ===".format(len(tweets)))
    
    return tweets

In [5]:
tweets = get_tweets('realDonaldTrump', MaxTweets=10)

Collecting data start.. from None to None
Collecting data end.. 0.01 Minutes
=== Total num of tweets is 10 ===


# 수집된 데이터 정리

- 수집되지 않은 정보는, 아래 정보를 통해 따로 크롤링해서 수집해줘야 한다.
- 트럼프가 리트윗한 트윗은 수집되지 않는다.
- 스레드 형식으로 이어진 트윗은 수집되지 않는다.
<br><br>
``` python
tweets[2].username  # realDonaldTrump
tweets[2].date     # datetime.datetime(2019, 9       날짜
tweets[2].text     # 'Now the Fake News Media says'    내용
tweets[2].favorites  # 31942                    좋아요
tweets[2].retweets  # 8436                     리트윗 수
tweets[2].replies   # 3906                     댓글
tweets[2].permalink  # https://tw(..)              트위터 링크
```

In [4]:
def tweetsDataFrame(tweets):
    return pd.DataFrame(
            [[tweet.date.strftime("%Y%m%d"), 
              tweet.date
              .strftime("%H:%M:%S"),
              tweet.text, 
              tweet.favorites, 
              tweet.retweets, 
              tweet.replies, 
              tweet.permalink] for tweet in tweets],
        columns=['date', 'time', 'text', 'favorites', 'retweets', 'replies', 'permalink']
    ) 

tweets = get_tweets("realDonaldTrump", "2015-01-01", "2015-02-01", "great")
tweetsDataFrame(tweets)

Collecting data start.. from 2015-01-01 to 2015-02-01
Collecting data end.. 0.11 Minutes
=== Total num of tweets is 117 ===


Unnamed: 0,date,time,text,favorites,retweets,replies,permalink
0,20150131,11:55:06,""" @catham11: @realDonaldTrump Congrats Trump i...",20,9,4,https://twitter.com/realDonaldTrump/status/561...
1,20150131,02:26:41,I hope you can go to @oreillyfactor and vote f...,2130,4480,53,https://twitter.com/realDonaldTrump/status/561...
2,20150130,15:09:11,""" @VladimirRussia7 @realDonaldTrump You are th...",48,93,18,https://twitter.com/realDonaldTrump/status/561...
3,20150129,03:38:15,"""@HarryPlush: Think @realDonaldTrump would be ...",71,31,21,https://twitter.com/realDonaldTrump/status/560...
4,20150128,14:56:10,""" @marklevinshow: Excellent article, read to t...",34,25,2,https://twitter.com/realDonaldTrump/status/560...
5,20150128,14:08:03,Getting ready to go on @KellyandMichael - two ...,92,32,24,https://twitter.com/realDonaldTrump/status/560...
6,20150128,07:20:03,""" @EyeOnPolitics: But, I loved seeing @IvankaT...",30,9,4,https://twitter.com/realDonaldTrump/status/560...
7,20150127,14:24:17,""" @coverallpads: @realDonaldTrump @ApprenticeN...",30,7,9,https://twitter.com/realDonaldTrump/status/560...
8,20150127,13:47:43,"President Obama, our great leader, wants to de...",171,180,92,https://twitter.com/realDonaldTrump/status/560...
9,20150127,02:11:05,""" @aenochs64: @ApprenticeNBC @Kateplusmy8 @rea...",18,7,2,https://twitter.com/realDonaldTrump/status/559...
