In [12]:
from datetime import datetime
import pandas as pd

In [13]:
# INPUT                                                                                                                                     
all_tweets = 'tweets/all_tweets.csv' 
url_by_id = 'https://twitter.com/statuses/'
df = pd.read_csv(all_tweets) 
df['id'] = url_by_id + df['id'].astype(str)

print(df.shape)
df.head(2)

(43789, 5)


Unnamed: 0,user,datetime,text,id,entities
0,realDonaldTrump,2015-01-07 21:42:11 UTC,Via @TheYBF: “@msvivicafox Attends A Private S...,https://twitter.com/statuses/552943302518272000,http://theybf.com/2015/01/06/the-apprentices-c...
1,realDonaldTrump,2015-01-07 21:55:38 UTC,My condolences and prayers to the victims of t...,https://twitter.com/statuses/552946688558907393,


In [14]:
pres = [
    'realDonaldTrump', 'POTUS',
]

gov = [ 
    'WhiteHouse', 'PressSec', 'RudyGiuliani', 'StephenBannon', 'jeffsessions', 'KellyannePolls',      
    'GenFlynn', 
]
news = [
    'NBCNews', 'CNN', 'cnnbrk', 'FoxNews', 'AP', 'nytimes', 'BreitbartNews', 'guardian',                           
]


def organize(df, users):
    # get certain groups
    df_new = df.copy()
    df_new = df_new[ df_new['user'].isin(users) ]
    
    # reindex
    df_new.index = range(df_new.shape[0])   
    
    # sort by date
    df_new.iloc[:,1] = pd.to_datetime(df_new['datetime'], format='%Y-%m-%d %H:%M:%S %Z' )
    df_new = df_new.sort_values(by='datetime', ascending=False)
    
    # sort by user
    df_n = pd.DataFrame()
    for group_name, df_g in df_new.groupby(['user']):
        df_n = df_n.append(df_g, ignore_index=True)
    
    # reindex
    df_n.index = range(df_n.shape[0])
    
    return df_n

df_pres = organize(df, pres)
df_gov  = organize(df, gov)
df_news = organize(df, news)

In [15]:
print(df_pres.shape)
df_pres.head(3)

(444, 5)


Unnamed: 0,user,datetime,text,id,entities
0,POTUS,2017-03-01 02:41:32,Obamacare is collapsing – and we must act deci...,https://twitter.com/statuses/836768336163799040,
1,POTUS,2017-03-01 02:31:45,My team is developing historic tax reform that...,https://twitter.com/statuses/836765872991633408,
2,POTUS,2017-02-26 18:45:40,"Russia talk is FAKE NEWS put out by the Dems, ...",https://twitter.com/statuses/835923804983603202,


# Selected Tweets

In [16]:
keywords = ['ban', 'Islam', 'Islamic', 'ISIS', 'mosque', 'muslim', 'muslims', 'radical', 'refugee', 
            'register', 'registry', 'religion', 'religious', 'terror']

keywords = ['ban', 'Islam', 'ISIS', 'mosque', 'muslim', 'radical', 'refugee', 
            'register', 'registry', 'religion', 'religious', 'terror',
            'executive', 'order', 'immigrant',
           ]

keywords = [k.lower() for k in keywords]
pattern = '|'.join(keywords)
pattern

'ban|islam|isis|mosque|muslim|radical|refugee|register|registry|religion|religious|terror|executive|order|immigrant'

In [17]:
# get certain groups
df1 = df_pres[ df_pres.text.str.lower().str.contains(pattern) ]
    
# reindex
df1.index = range(df1.shape[0])   

# write                                                                                            
df1.to_csv('president_tweets.csv', mode='w', index=False)
print(df1.shape)
df1.head(3)

(309, 5)


Unnamed: 0,user,datetime,text,id,entities
0,POTUS,2017-02-26 18:45:40,"Russia talk is FAKE NEWS put out by the Dems, ...",https://twitter.com/statuses/835923804983603202,
1,POTUS,2017-02-24 19:46:14,This afternoon I signed an executive order to ...,https://twitter.com/statuses/835214271453806592,https://twitter.com/i/web/status/8352142714538...
2,POTUS,2017-02-03 19:13:44,Remarks by President Trump at Signing of Execu...,https://twitter.com/statuses/827595948469673986,https://www.whitehouse.gov/the-press-office/20...


In [18]:
# get certain groups
df2 = df_gov[ df_gov.text.str.lower().str.contains(pattern) ]
    
# reindex
df2.index = range(df2.shape[0])   

# write                                                                                           
df2.to_csv('administration_tweets.csv', mode='w', index=False)
print(df2.shape)
df2.head(3)

(144, 5)


Unnamed: 0,user,datetime,text,id,entities
0,GenFlynn,2016-11-06 03:28:40,This wasn't workplace violence. This was a rad...,https://twitter.com/statuses/795105595485523968,https://twitter.com/i/web/status/7951055954855...
1,GenFlynn,2016-11-02 21:52:23,"After Mosul is liberated, ISIS could attack US...",https://twitter.com/statuses/793933801315561473,http://fxn.ws/2fFhvWL
2,GenFlynn,2016-10-04 23:41:10,"Leaked FBI Data Reveal 7,700 Terrorist Encount...",https://twitter.com/statuses/783451928697057284,http://www.breitbart.com/texas/2016/09/26/leak...


In [8]:
# get certain groups
df3 = df_news[ df_news.text.str.lower().str.contains(pattern) ]
    
# reindex
df3.index = range(df3.shape[0])   

print(df3.shape)
df3.head(3)

(29111, 5)


Unnamed: 0,user,datetime,text,id,entities
0,AP,2017-03-11 01:26:09,Turkey aims to repair its image in West by jus...,https://twitter.com/statuses/840373244360314880,http://apne.ws/2mtNN7E
1,AP,2017-03-10 23:27:02,Calif. man pleads guilty to stabbing US airman...,https://twitter.com/statuses/840343267921281024,http://apne.ws/2mc4u6o
2,AP,2017-03-10 06:30:12,"VIDEO: Amal Clooney urges Iraq, world's nation...",https://twitter.com/statuses/840087372578471936,http://apne.ws/2lJGImj


# Analysis

In [9]:
import nltk
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from nltk.tokenize import TweetTokenizer
from nltk.tokenize import RegexpTokenizer

stop = stopwords.words('english') 
tweet_tokenizer = TweetTokenizer()
word_tokenizer = RegexpTokenizer(r'\w+')

In [10]:
stop.extend(['.', ',', '?', '!', '\'','’', '"',':', ';', '-','/', '\\', '(', ')', '#', '&', 'U','S','A'] )
corpus = df1['text'].values
corpus = ' '.join(corpus)
corpus = corpus.lower()
all_tokens = tweet_tokenizer.tokenize(corpus)
all_tokens = [w for w in all_tokens if w not in stop]

fdist = nltk.FreqDist(all_tokens)

In [11]:
fdist.most_common(25) # 25 most common words

[('isis', 89),
 ('order', 64),
 ('hillary', 54),
 ('trump', 35),
 ('radical', 32),
 ('obama', 30),
 ('islamic', 28),
 ('people', 28),
 ('country', 26),
 ('great', 26),
 ('get', 25),
 ('president', 24),
 ('terrorism', 24),
 ('terrorists', 24),
 ('america', 23),
 ('terrorist', 20),
 ('@realdonaldtrump', 20),
 ('terror', 19),
 ('must', 19),
 ('crooked', 19),
 ('executive', 18),
 ('signing', 18),
 ('going', 18),
 ('time', 18),
 ('clinton', 18)]

In [35]:
from newspaper import Article

url = 'http://bit.ly/2k4fvqgEUPoll'
a = Article(url)
a.download()
a.parse()

print(a.title)
print(a.authors)
print(a.top_image)
print(a.movies)
print(a.text)

Majority in Leading EU Nations Support Trump-Style Travel Ban: Poll
['Alastair Jamieson']
https://media1.s-nbcnews.com/j/newscms/2017_06/1860461/ss-170110-migrants-serbia-winter-cr_01_2759cb78027ed04ab949ccd37eef72ba.nbcnews-fp-1200-800.jpg
[]
A majority of Europeans would support a Trump-style ban on further migration from mainly Muslim countries, according to a poll of more than 10,000 people in 10 countries.

An average of 55 percent of those surveyed for London-based think tank, Chatham House, agreed that immigration from Muslim-majority countries should be halted.

The poll released Tuesday comes after a bruising 18 months that have seen a string of terrorist massacres linked to radical Islam as well as record-setting levels of migration that have created social tensions across the continent.

Migrants wait to receive free food during a snowfall outside a derelict customs warehouse in Belgrade, Serbia on Jan. 9, 2017. Marko Djurica / Reuters, file

Majorities in all but two of the

In [37]:
a.nlp()
print(a.keywords, '\n')
print(a.summary)

['majority', 'ban', 'trumpstyle', 'poll', 'muslim', 'countries', 'chatham', 'support', 'leading', 'eu', 'surveyed', 'migration', 'travel', 'supported', 'house', 'nations'] 

a majority of europeans would support a trump-style ban on further migration from mainly muslim countries, according to a poll of more than 10,000 people in 10 countries.
an average of 55 percent of those surveyed for london-based think tank, chatham house, agreed that immigration from muslim-majority countries should be halted.
migrants wait to receive free food during a snowfall outside a derelict customs warehouse in belgrade, serbia on jan. 9, 2017.
chatham house called the findings "striking and sobering" and said in a news release that the figures "suggest that public opposition to further migration from predominantly muslim states is by no means confined to trump's electorate."
the chatham house poll, conducted with kantar public, surveyed samples of over-18s in austria, belgium, france, germany, greece, hun

In [67]:
import nltk

tokens = nltk.word_tokenize(a.text.lower())
text = nltk.Text(tokens)

text.concordance('muslim', width=60, lines=10)
#print(tokens)

Displaying 3 of 3 matches:
                           muslim countries , according to a
gration from predominantly muslim states is by no means conf
ther migration from mainly muslim countries should be stoppe


In [84]:
ngram = 15
c = nltk.ConcordanceIndex(tokens, key = lambda s: s.lower())
concodances = [text.tokens[offset-ngram:offset+ngram] for offset in c.offsets('muslim') 
               if offset>=ngram and offset<len(text.tokens)-ngram]
concordances = []
for offset in c.offsets('muslim'):
    if offset>=ngram and offset-ngram<len(text.tokens):
        string = ' '.join( text.tokens[offset-ngram:offset+ngram] )
        concordances.append(string)
    elif offset<ngram and offset-ngram<len(text.tokens):
        string = ' '.join( text.tokens[offset:offset+ngram] )
        concordances.append(string)
    elif offset>=ngram and offset-ngram>=len(text.tokens):
        string = ' '.join( text.tokens[offset:offset] )
        concordances.append(string)
        
for concordance in concordances:
    print(concordance, '\n')

muslim countries , according to a poll of more than 10,000 people in 10 countries 

news release that the figures `` suggest that public opposition to further migration from predominantly muslim states is by no means confined to trump 's electorate . '' the poll 

chatham house ( @ chathamhouse ) february 7 , 2017 'all further migration from mainly muslim countries should be stopped' agree : poland 71 % france 61 % uk 47 

