## Step 1. Read Important Libraries

In [1]:
import numpy as np
import pandas as pd
import tweepy
from datetime import datetime, date, time, timedelta
from collections import Counter
import sys
from wordcloud import WordCloud, STOPWORDS 
import matplotlib.pyplot as plt
import regex
from nltk import tokenize
from nltk.tokenize import TweetTokenizer

## Step 2. Authenticate to Twitter

Put 4 APIs in a csv and read them

In [2]:
API = pd.read_csv('API.csv')

In [3]:
auth = tweepy.OAuthHandler(API['API'][0], API['API'][1])
auth.set_access_token(API['API'][2], API['API'][3])

In [4]:
api = tweepy.API(auth)

## Step 3. Getting Trump's Twitter contents and date

Define function to get date and contents

In [5]:
def Tweets(User):
    All = api.user_timeline(screen_name=User,include_rts = False,count=200,exclude_replies=True,tweet_mode="extended")
    Tweet = []
    Date = []
    for tweet in All:
        if tweet.created_at > datetime(2020,1,11,0,0,0) and tweet.created_at < datetime(2020,1,18,0,0,0):
            Tweet.append(tweet.full_text)
            Date.append(tweet.created_at)
    User_Tweets = pd.DataFrame({'Date':Date,'Tweets':Tweet})
    return User_Tweets

Specify Trump's Twitter account

In [6]:
Tweets = Tweets('realDonaldTrump')

In [7]:
Tweets

Unnamed: 0,Date,Tweets
0,2020-01-17 23:10:50,مردم نجیب ایران، که آمریکا را دوست می دارند، س...
1,2020-01-17 23:10:21,The noble people of Iran—who love America—dese...
2,2020-01-17 22:22:05,"The so-called “Supreme Leader” of Iran, who ha..."
3,2020-01-17 22:22:05,Your 2nd Amendment is under very serious attac...
4,2020-01-17 22:22:04,Words of wisdom. Thank you Ted! https://t.co/L...
5,2020-01-17 20:10:15,Heading to Florida for big Republican Party ev...
6,2020-01-17 15:35:00,They are rigging the election again against Be...
7,2020-01-17 15:17:43,"Getting ready to meet the LSU Tigers, the Nati..."
8,2020-01-17 14:12:06,Mini Mike Bloomberg ads are purposely wrong - ...
9,2020-01-17 13:39:21,Mini Mike Bloomberg doesn’t get on the Democra...


## Step 4. Sentence and Words Manipulation

Remove all other languages (Such as Arabic), number and symbols

In [8]:
Sentence=[]
i = 0
while i<len(Tweets['Tweets']):
    Sentence.append(regex.sub('[^\p{Latin}]',' ', Tweets['Tweets'][i]))
    i=i+1
Sentence

['                                                                                                                                                                                                                                                                                     https   t co RLjGsC WLc',
 'The noble people of Iran who love America deserve a government that s more interested in helping them achieve their dreams than killing them for demanding respect  Instead of leading Iran toward ruin  its leaders should abandon terror and Make Iran Great Again  https   t co RLjGsC WLc',
 'The so called  Supreme Leader  of Iran  who has not been so Supreme lately  had some nasty things to say about the United States and Europe  Their economy is crashing  and their people are suffering  He should be very careful with his words ',
 'Your  nd Amendment is under very serious attack in the Great Commonwealth of Virginia  That s what happens when you vote for Democrats  they will take your g

Split Sentence into words

In [89]:
tknzr = TweetTokenizer()
Sentence_to_word = []
i = 0
while i < len(Sentence):
    Sentence_to_word.append(" ".join(tknzr.tokenize(Sentence[i])))
    i = i + 1

In [90]:
Word = []
i = 0
while i < len(Sentence_to_word):
    Word.append(Sentence_to_word[i].split())
    i = i + 1

I need to drop all the Twitter Links like 'https//t.co.....'

In [91]:
def Deletehttps(Sentence):
    i = 0
    while i < len(Sentence):
        s = 0
        while s < len(Sentence[i]):
            if Sentence[i][s] != 'https':
                s = s + 1
            else:
                Sentence[i] = Sentence[i][0:s]
        i = i + 1

In [92]:
Deletehttps(Word)

Join Words to Sentence

In [93]:
Modified = [[' '.join(i)] for i in Word]

Get scores of each Tweets

In [94]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [95]:
sia = SentimentIntensityAnalyzer()
Score = []
for var in Sentence:
    Score.append(sia.polarity_scores(var)['compound'])

Create a dataframe includes Tweets and scores

In [108]:
Trump = pd.DataFrame({'Score':Score,
                      'Tweets':Modified})
Trump['Tweets'] = Trump['Tweets'].str.get(0)

Delete Tweets which scores are 0 

In [109]:
Trump[Trump.Score!=0]

Unnamed: 0,Score,Tweets
1,0.7316,The noble people of Iran who love America dese...
2,0.6956,The so called Supreme Leader of Iran who has n...
3,0.7528,Your nd Amendment is under very serious attack...
4,0.7096,Words of wisdom Thank you Ted
5,0.4019,Heading to Florida for big Republican Party ev...
6,0.5670,They are rigging the election again against Be...
7,0.9709,Getting ready to meet the LSU Tigers the Natio...
8,-0.2500,Mini Mike Bloomberg ads are purposely wrong A ...
9,-0.4215,Mini Mike Bloomberg doesn t get on the Democra...
10,-0.1531,There is no crime here Entertaining this Impea...


Create label.

When score<0, Tweets are negative.

When score.0, Tweets are positive.

In [110]:
Positive = Trump[Trump.Score>0]
Positive['Label'] = 'Positive'
Negative = Trump[Trump.Score<0]
Negative['Label'] = 'Negative'
Trump = Positive.append(Negative).drop('Score',axis=1).reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Final dataset looks like this:

In [113]:
Trump

Unnamed: 0,Tweets,Label
0,The noble people of Iran who love America dese...,Positive
1,The so called Supreme Leader of Iran who has n...,Positive
2,Your nd Amendment is under very serious attack...,Positive
3,Words of wisdom Thank you Ted,Positive
4,Heading to Florida for big Republican Party ev...,Positive
5,They are rigging the election again against Be...,Positive
6,Getting ready to meet the LSU Tigers the Natio...,Positive
7,PROMISES MADE PROMISES KEPT KeepAmericaGreat,Positive
8,See you tomorrow CHAMPS,Positive
9,THANK YOU MAGA,Positive


## Step 5. Transform dataframe to csv

Transfer the new dataset to CSV and use R to do machine learning for predicting labels

In [112]:
Trump.to_csv('Trump.csv')