<h3>Using Twitter API to gather tweets based on speciifc hashtags and its sentiment

In [2]:
from textblob import TextBlob
from twython import Twython
import json
import pandas as pd
import re

def clean_tweet(tweet):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def sentiment(tweet):
    ''' 
    Utility function to classify sentiment of passed tweet 
    using textblob's sentiment method 
    '''
    # create TextBlob object of passed tweet text 
    analysis = TextBlob(clean_tweet(tweet)) 
    # set sentiment 
    if analysis.sentiment.polarity > 0: 
        return 1
    #elif analysis.sentiment.polarity == 0: 
        #return 'neutral'    
    return 0

### Note: You cannot get tweets unless you have access to `Twitter API`
<h3>Check the format below in order to use your credentials

#### Define your credentials in `twitter_credentials.json` file based on the keys you get from API <u>BEFORE EXECUTING THIS PART</u>

In [3]:
# Load credentials from json file
with open("twitter_credentials.json", "r") as file:
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['consumer_key'], creds['consumer_secret']) # ,creds['access_token'],creds['access_token_secret']
# ['airpods','iphone12','MacBookPro16']

#### Customizing queries with hashtags, timestamp, language and quantity
In this project the purpose was to get a fair quantitity of each `label` (positive and negative meaning tweets) and `hashtags` (#airpods, #iphone12, #macbookpro16)

In [7]:
#Twitter is queried
response = python_tweets.search(q='airpods', since = "2016-12-13", count=5000, lang='en')

#Results are printed
#print(json.dumps(response, sort_keys = True, indent = 2))

# Search tweets
dict_ = {'id': [], 'text': [], 'label':[]}
p = 0 # positive meaning tweets
n = 0 # negative meaning tweets
for status in response['statuses']:
    if (not status['retweeted']) and ('RT @' not in status['text']):
        if n == 14 and p == 14:
            break
        if sentiment(status['text']) and p < 14:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(1)
            p+=1
        elif sentiment(status['text']) and n < 14:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(0)
            n+=1

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
#df.sort_values(by='favorite_count', inplace=True, ascending=False)

Unnamed: 0,id,text,label
0,1244295516927688712,@SAfmnews The tweets in here are full of spark...,1
1,1244171436907999232,@CHINWENDUH @Ikelectron @oblomart @I_amDozie H...,1
2,1244043940736704517,"@JuliaHB1 l am old,but I understand that this ...",1
3,1243605527663448068,@pril_98 @TwoDaeFourEight @lifelessmachine @sp...,1
4,1243259668174008321,someone is already 21 yrs old but can't even m...,1
5,1242642040103424000,With kiwis using their devices more &amp; scam...,1
6,1242567113262256128,@digiwonk @nora3000 Hi big fan of @spark and @...,1
7,1242452796659523584,"Let there be light, let there be light\nI spar...",1
8,1242189629819887616,@Issamoodi @weakintheheart @trishapaytas While...,1


In [4]:
#Twitter is queried
response = python_tweets.search(q='iphone12', since = "2019-09-20", count=2000, lang='en')

#Results are printed
#print(json.dumps(response, sort_keys = True, indent = 2))

# Search tweets
p = 0 # positive meaning tweets
n = 0 # negative meaning tweets
for status in response['statuses']:
    if (not status['retweeted']) and ('RT @' not in status['text']):
        if n == 8 and p == 8:
            break
        if sentiment(status['text']) and p < 8:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(1)
            p+=1
        elif sentiment(status['text']) and n < 8:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(0)
            n+=1

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
#df.sort_values(by='favorite_count', inplace=True, ascending=False)

In [5]:
#Twitter is queried
response = python_tweets.search(q='macbookpro16', since = "2013-06-05", count=100000, lang='en')

#Results are printed
#print(json.dumps(response, sort_keys = True, indent = 2))

# Search tweets
p = 0 # positive meaning tweets
n = 0 # negative meaning tweets
for status in response['statuses']:
    if (not status['retweeted']) and ('RT @' not in status['text']):
        if n == 3 and p == 3:
            break
        if sentiment(status['text']) and p < 3:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(1)
            p+=1
        elif sentiment(status['text']) and n < 3:
            dict_['id'].append(status['id'])
            dict_['text'].append(status['text'])
            dict_['label'].append(0)
            n+=1

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
#df.sort_values(by='favorite_count', inplace=True, ascending=False)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
id       50 non-null int64
text     50 non-null object
label    50 non-null int64
dtypes: int64(2), object(1)
memory usage: 1.3+ KB


#### Export your dataframe as a `json` file

In [6]:
df.to_json (r'piratacodex.json',orient='records')