# Capturing Tweets on a dataset

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
from collections import Counter
import csv
import pickle
from sklearn.externals import joblib

## Main Functions

In [2]:
def get_info(tweets):
    item = tweets[0].user   
    print("\nGetting data for  " + item.screen_name)
    print("Description:      " + item.description)
    print("\nName:             " + item.name)
    print("Screen_name:      " + item.screen_name)
    print("Location:         " + item.location)
    print("Tweets_count:     {0:,}  ".format(item.statuses_count))
    print("Friends_count:    {0:,}  ".format(item.friends_count))
    print("Followers_count:  {0:,}  ".format(item.followers_count))
    print("Favourites_count: {0:,}  ".format(item.favourites_count))    
    tweet_stat(item)

In [3]:
def tweet_stat(item):
    tweets_cnt = item.statuses_count
    account_created_date = item.created_at
    delta = datetime.utcnow() - account_created_date
    account_age_days = delta.days
    print("\nAccount created: " + str(account_created_date))
    print("Account age (in days): {0:,}  ".format(account_age_days))
    if account_age_days > 0:
        print("Average tweets per day: " + "%.2f"%(float(tweets_cnt)/float(account_age_days)))

In [4]:
def get_users_hashtags(tweets):
    hashtags = []
    mentions = []
    tweet_count = 0
    end_date = datetime.utcnow() - timedelta(days=460)
    for status in tweets:
        tweet_count += 1
        if hasattr(status, "entities"):
            entities = status.entities
            if "hashtags" in entities:
                for ent in entities["hashtags"]:
                    if ent is not None:
                        if "text" in ent:
                            hashtag = ent["text"]
                            if hashtag is not None:
                                hashtags.append(hashtag)
            if "user_mentions" in entities:
                for ent in entities["user_mentions"]:
                    if ent is not None:
                        if "screen_name" in ent:
                            name = ent["screen_name"]
                            if name is not None:
                                mentions.append(name)
        if status.created_at < end_date:
            break
        
    print("\nMost mentioned Twitter users:")
    for item, count in Counter(mentions).most_common(10):
        print(item + "\t" + str(count))
    print("\nMost used hashtags:")
    for item, count in Counter(hashtags).most_common(10):
        print(item + "\t" + str(count))
    print ("\nAll done. Processed " + str(tweet_count) + " tweets.")
    
    return mentions, hashtags

In [5]:
def save_csv(tweets, screen_name):
    tweets_out = [[tweet.created_at, tweet.retweet_count, tweet.favorite_count, tweet.full_text] for tweet in tweets]
    with open('%s_tweets.csv' % screen_name, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(["created_at", "retweets", "favorites", "text"])
        writer.writerows(tweets_out)
    print("Archive saved: "+screen_name+"_tweets.csv")

---

## Creating and Saving Datasets

In [105]:
screen_name = 'jairbolsonaro'

In [106]:
# Load full raw tweets from a pickle file
tweets_jb = joblib.load('./raw_data/'+screen_name+'_raw_tweets.pkl')

In [137]:
tweets_jb[0]

Status(_json={'created_at': 'Sun Mar 31 18:24:50 +0000 2019', 'id': 1112420504407863297, 'id_str': '1112420504407863297', 'full_text': 'Reconhecendo os vínculos históricos de Jerusalém com a identidade judaica e também que a cidade é o coração político do Estado de Israel, anunciei hoje que o Brasil abrirá lá um escritório brasileiro para promoção do comércio, investimentos e intercâmbio em inovação e tecnologia. https://t.co/RfMKEl9XtB', 'truncated': False, 'display_text_range': [0, 280], 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': [], 'media': [{'id': 1112420499173359617, 'id_str': '1112420499173359617', 'indices': [281, 304], 'media_url': 'http://pbs.twimg.com/media/D3AcjNYXQAEw4FT.jpg', 'media_url_https': 'https://pbs.twimg.com/media/D3AcjNYXQAEw4FT.jpg', 'url': 'https://t.co/RfMKEl9XtB', 'display_url': 'pic.twitter.com/RfMKEl9XtB', 'expanded_url': 'https://twitter.com/jairbolsonaro/status/1112420504407863297/photo/1', 'type': 'photo', 'sizes': {'thumb'

In [138]:
tweets_jb[0].created_at

datetime.datetime(2019, 3, 31, 18, 24, 50)

In [136]:
tweets_jb[0].id

1112420504407863297

In [139]:
tweets_jb[0].retweet_count

2985

In [139]:
tweets_jb[0].favorite_count

2985

In [141]:
tweets_jb[0].user.screen_name

'jairbolsonaro'

In [142]:
tweets_jb[0].user.name

'Jair M. Bolsonaro'

In [144]:
tweets_jb[3000].user.followers_count

3872290

In [107]:
get_info(tweets_jb)
mentions_jb, hashtags_jb = get_users_hashtags(tweets_jb)


Getting data for  jairbolsonaro
Description:      Capitão do Exército Brasileiro, eleito 38° Presidente da República Federativa do Brasil. 🇧🇷

Name:             Jair M. Bolsonaro
Screen_name:      jairbolsonaro
Location:         Brasília, Brasil
Tweets_count:     6,424  
Friends_count:    330  
Followers_count:  3,872,290  
Favourites_count: 1,804  

Account created: 2010-03-31 23:13:44
Account age (in days): 3,286  
Average tweets per day: 1.95

Most mentioned Twitter users:
CarlosBolsonaro	122
BolsonaroSP	89
jairbolsonaro	69
RenovaMidia	22
planalto	20
tarcisiogdf	18
odiodobem	18
BlogDoPim	18
FlavioBolsonaro	16
conexaopolitica	15

Most used hashtags:
NasRuasComBolsonaro	19
BolsonaroNaBand	5
Brasil	4
PPI	3
cestou	2
Fakenews	2
BolsonaroPresidente17	2
PTnão	2
EuVotoBolsonaro	2
TodosComBolsonaro	2

All done. Processed 2732 tweets.


In [108]:
save_csv(tweets_jb)

Archive saved: jairbolsonaro_tweets.csv


## Importing datasets with Pandas

In [10]:
jb_tws = pd.read_csv('jairbolsonaro_tweets.csv')
jb_tws.head()

Unnamed: 0,created_at,retweets,favorites,text
0,2019-03-31 18:24:50,2985,17307,Reconhecendo os vínculos históricos de Jerusal...
1,2019-03-31 12:19:57,6288,34952,Chegamos há pouco em Israel. Fomos recepcionad...
2,2019-03-31 10:22:14,4035,30343,Ao renovar as concessões de trechos rodoviário...
3,2019-03-31 10:19:02,6327,41684,Após revelação do @MInfraestrutura de pedidos ...
4,2019-03-30 20:25:30,5619,26156,- Ministro da Infraestrutura @tarcisiogdf (cap...


In [11]:
# Verifing a single tweet text
jb_tws.text[1]

'Chegamos há pouco em Israel. Fomos recepcionados pelo Primeiro-Ministro @netanyahu e pudemos proferir palavras à esta nação amiga e o intuito de nossa viagem. Shalom! 🇧🇷🤝🇮🇱 https://t.co/TyWd5pKs0S'

In [12]:
jb_tws.created_at.min()

'2017-10-13 01:11:45'

---

In [112]:
screen_name = 'CarlosBolsonaro'

In [113]:
# Load full raw tweets from a pickle file
tweets_cb = joblib.load('./raw_data/'+screen_name+'_raw_tweets.pkl')

In [114]:
get_info(tweets_cb)
mentions_cb, hashtags_cb = get_users_hashtags(tweets_cb)


Getting data for  CarlosBolsonaro
Description:      Eleito aos 17 anos de idade, exerce o quinto mandato de vereador do Rio de Janeiro e é filho do Presidente @JairBolsonaro.

Name:             Carlos Bolsonaro
Screen_name:      CarlosBolsonaro
Location:         Rio de Janeiro-RJ
Tweets_count:     11,865  
Friends_count:    430  
Followers_count:  1,038,807  
Favourites_count: 2,511  

Account created: 2009-08-25 14:53:27
Account age (in days): 3,505  
Average tweets per day: 3.39

Most mentioned Twitter users:
geraldoalckmin	169
jairbolsonaro	141
BolsonaroSP	56
lsentoes	46
leandroruschel	39
filgmartin	38
conexaopolitica	25
Clauwild1	25
flaviomorgen	24
CarlosBolsonaro	23

Most used hashtags:
fakenews	5
Brasil	4
NasRuasComBolsonaro	4
EstouComBolsonaro	4
BolsonaroNaBand	3
Bolsonaro2018	3
FakeNews	2
EuVotoBolsonaro	2
BrasilComBolsonaro	2
BolsonaroPresidente17	2

All done. Processed 3150 tweets.


In [97]:
save_csv(tweets_cb)

Archive saved: CarlosBolsonaro_tweets.csv


In [116]:
cb_tws = pd.read_csv('CarlosBolsonaro_tweets.csv')

In [117]:
cb_tws.head(10)

Unnamed: 0,created_at,retweets,favorites,text
0,2019-03-31 12:46:40,93,386,A população agradece! https://t.co/1m0PZ3GXvk
1,2019-03-30 14:29:31,1654,6375,"Mais detalhes sobre a Nova Previdência, por @V..."
2,2019-03-29 21:25:03,1657,9512,A VERDADE.... mais uma vez: https://t.co/IfFW3...
3,2019-03-29 20:25:19,49,456,@RitaCar15454727 @tesoureirosdoJB Quem pauta o...
4,2019-03-29 20:09:56,124,822,@tesoureirosdoJB Esfregue o que quiser onde qu...
5,2019-03-29 19:42:46,3740,19044,O novo modus operandi de grande parte da impre...
6,2019-03-29 10:42:47,2304,12705,Como titular da Comissão de Direitos Humanos d...
7,2019-03-28 23:20:01,1548,8679,Parabéns aos envolvidos. Ver o crescimento do ...
8,2019-03-28 16:52:11,1776,0,RT @JornalDaCidadeO: O establishment brasileir...
9,2019-03-28 11:05:34,2056,9471,Mais notícias boas! Ainda bem que a internet e...


In [118]:
cb_tws.text[8]

'RT @JornalDaCidadeO: O establishment brasileiro nunca quis saber de Bolsonaro e trama a sua derrubada de todas as formas.\nhttps://t.co/mB3t…'

---

In [6]:
def create_csv (screen_name, prt = True):
    # Load full raw tweets from a pickle file
    tweets = joblib.load('./raw_data/'+screen_name+'_raw_tweets.pkl')
    # print tweets main data
    if prt:
        get_info(tweets)
        mentions_cb, hashtags_cb = get_users_hashtags(tweets)
    #save csv_file
    save_csv(tweets, screen_name)

In [125]:
!ls ./raw_data

BlogDoPim_raw_tweets.pkl       LulaOficial_raw_tweets.pkl
BolsonaroSP_raw_tweets.pkl     gleisi_raw_tweets.pkl
CarlosBolsonaro_raw_tweets.pkl jairbolsonaro_raw_tweets.pkl
FlavioBolsonaro_raw_tweets.pkl


In [126]:
screen_name = 'BolsonaroSP'
create_csv (screen_name)


Getting data for  BolsonaroSP
Description:      Deputado Federal mais votado da história do Brasil (1.843.735) em seu segundo mandato por SÃO PAULO, Policial Federal, Advogado e 3º filho de Jair Bolsonaro🇧🇷

Name:             Eduardo Bolsonaro🇧🇷
Screen_name:      BolsonaroSP
Location:         São Paulo - SP
Tweets_count:     12,695  
Friends_count:    491  
Followers_count:  1,365,419  
Favourites_count: 13,636  

Account created: 2009-09-16 15:25:13
Account age (in days): 3,486  
Average tweets per day: 3.64

Most mentioned Twitter users:
jairbolsonaro	521
BolsonaroSP	411
CarlosBolsonaro	169
RenovaMidia	97
filgmartin	65
FlavioBolsonaro	57
ernestofaraujo	49
carteiroreaca	40
NicolasMaduro	40
Rumbo_Libertad	33

Most used hashtags:
Brasil	15
Venezuela	13
Repost	6
QuemMandouMatarBolsonaro	6
SemanaRENOVA	5
EUA	5
B17	5
SanatórioGeral	5
Israel	4
EuVotoBolsonaro	4

All done. Processed 3221 tweets.
Archive saved: BolsonaroSP_tweets.csv


---

In [127]:
screen_name = 'FlavioBolsonaro'
create_csv (screen_name)


Getting data for  FlavioBolsonaro
Description:      Agradeço a Deus e aos 4.380.418 votos que me elegeram Senador pelo RJ #bolsonaro

Name:             Flavio Bolsonaro
Screen_name:      FlavioBolsonaro
Location:         Rio de Janeiro
Tweets_count:     11,324  
Friends_count:    1,198  
Followers_count:  1,177,592  
Favourites_count: 1,739  

Account created: 2009-05-14 18:28:03
Account age (in days): 3,611  
Average tweets per day: 3.14

Most mentioned Twitter users:
jairbolsonaro	174
BolsonaroSP	35
CarlosBolsonaro	13
conexaopolitica	12
leandroruschel	11
PMERJ	10
carlosjordy	10
tercalivre	9
NetflixBrasil	9
benebarbosa_mvb	8

Most used hashtags:
FlavioBolsonaroSenador177	54
BolsonaroPresidente17	37
brasil	25
errejota	24
EstouComBolsonaro	23
rj	22
bolsonaro2018	22
riodejaneiro	21
bolsonaro	21
rio	20

All done. Processed 1117 tweets.
Archive saved: FlavioBolsonaro_tweets.csv


---

In [128]:
screen_name = 'LulaOficial'
create_csv (screen_name)


Getting data for  LulaOficial
Description:      Ex-presidente da República do Brasil (2003-2010). Preso político desde 7 de abril de 2018.

Name:             Lula
Screen_name:      LulaOficial
Location:         
Tweets_count:     17,716  
Friends_count:    530  
Followers_count:  732,030  
Favourites_count: 2,167  

Account created: 2014-07-22 21:36:50
Account age (in days): 1,716  
Average tweets per day: 10.32

Most mentioned Twitter users:
Haddad_Fernando	1043
LulaOficial	116
gleisi	70
ManuelaDavila	59
ptbrasil	48
inst_lula	46
dilmabr	30
wadih_damous	16
GuilhermeBoulos	14
ONUBrasil	14

Most used hashtags:
HaddadPresidente	205
LulaPresidente	192
LulaLivre	129
LulaLivreJá	104
Vote13	94
VotePorLulaVote13	52
HaddadÉ13	50
LulaÉHaddad	48
Haddad13	47
DebateNaGlobo	43

All done. Processed 3237 tweets.
Archive saved: LulaOficial_tweets.csv


---

In [129]:
screen_name = 'gleisi'
create_csv (screen_name)


Getting data for  gleisi
Description:      Gleisi é deputada federal  pelo Estado do Paraná e Presidenta nacional do Partido dos Trabalhadores (PT).

Name:             Gleisi Lula Hoffmann
Screen_name:      gleisi
Location:         Curitiba - Brasil
Tweets_count:     11,759  
Friends_count:    755  
Followers_count:  547,595  
Favourites_count: 807  

Account created: 2009-04-14 14:55:57
Account age (in days): 3,641  
Average tweets per day: 3.23

Most mentioned Twitter users:
LulaOficial	62
ptbrasil	61
LulaPeloBrasil	61
brasil247	39
Haddad_Fernando	26
gleisi	15
ManuelaDavila	11
DeputadoFederal	9
requiaopmdb	8
PTnaCamara	8

Most used hashtags:
LulaLivre	243
Haddad13	46
LulaPresidente	42
AoVivo	38
DebateComLula	32
PovoComLula	28
OBrasilFelizDeNovo	26
EleiçãoSemLulaÉFraude	24
AOVIVO	22
GleisiLula1313	22

All done. Processed 2118 tweets.
Archive saved: gleisi_tweets.csv


---

In [130]:
screen_name = 'BlogDoPim'
create_csv (screen_name)


Getting data for  BlogDoPim
Description:      Diretor de Jornalismo da Jovem Pan. Âncora e comentarista. Colunista da Crusoé. Maior influenciador político do Brasil no Twitter, segundo estudo da Stilingue.

Name:             Felipe Moura Brasil
Screen_name:      BlogDoPim
Location:         Rio de Janeiro
Tweets_count:     31,905  
Friends_count:    3,401  
Followers_count:  1,199,438  
Favourites_count: 11,477  

Account created: 2009-07-01 21:00:22
Account age (in days): 3,563  
Average tweets per day: 8.95

Most mentioned Twitter users:
BlogDoPim	39
JovemPanNews	24
xandresmoraes	6
andreazzaeditor	4
LuizFernandoM	4
anaerobica	3
RLSMO0780	3
JornalDaGazeta	3
Leandrogutts	3
carloscarvvalho	3

Most used hashtags:
OsPingosNosIs	441
PartiuPraCima	49
ResumãoAntagonista	41
DebateBand	22
DebateRedeTV	15
Retrospectiva2018	8
Sextou	6
RenanNão	6
PTNão	6
RenanNever	5

All done. Processed 3219 tweets.
Archive saved: BlogDoPim_tweets.csv


---

In [7]:
screen_name = 'Haddad_Fernando'
create_csv (screen_name)


Getting data for  Haddad_Fernando
Description:      Sou professor da USP e do Insper, ministro da Educação (2005-2012) e prefeito de São Paulo (2013-2016).

Name:             Fernando Haddad
Screen_name:      Haddad_Fernando
Location:         São Paulo
Tweets_count:     3,776  
Friends_count:    305  
Followers_count:  1,382,893  
Favourites_count: 321  

Account created: 2011-08-13 03:51:21
Account age (in days): 2,795  
Average tweets per day: 1.35

Most mentioned Twitter users:
LulaOficial	202
Haddad_Fernando	44
UOL	27
ManuelaDavila	26
MichelTemer	22
UOLNoticias	18
elpais_brasil	17
Estadao	14
jairbolsonaro	14
folha	11

Most used hashtags:
HaddadPresidente	119
Vote13	86
Haddad13	75
LulaPresidente	75
VotePorLulaVote13	53
HaddadÉ13	42
DebateNaGlobo	32
DebateBand	32
AOVIVO	28
HaddadAoVivo	24

All done. Processed 2262 tweets.
Archive saved: Haddad_Fernando_tweets.csv


---

In [None]:
screen_name = ''
create_csv (screen_name)