In [102]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
import itertools

In [103]:

query = '"e-levy" lang:en until:2022-09-20 since:2022-05-01 -filter:links'

tweets = []
limit = 100

data = sntwitter.TwitterSearchScraper(query).get_items()
for tweet in data:
    if len(tweets) == limit:
        break
    else:
        tweets.append([tweet.id, tweet.content, tweet.date])
        
df = pd.DataFrame(tweets, columns=['id', 'Tweet', 'Date'])

df.to_csv('stream.csv', index=False, columns=['id','Tweet','Date'])

In [104]:
print(df.head())

                    id                                              Tweet  \
0  1571994188660744192  @edburtler @shamimamuslim Notice that Shamima ...   
1  1571992159183593473  @edburtler @shamimamuslim QUESTION: \nWhy was ...   
2  1571981363699679233  @FrankOw18664478 @hearttooclean @mandemthe1st ...   
3  1571981131238543366  @FrankOw18664478 @hearttooclean @mandemthe1st ...   
4  1571980917006278656  @FrankOw18664478 @bra_Kofi__ @hearttooclean @m...   

                       Date  
0 2022-09-19 22:46:35+00:00  
1 2022-09-19 22:38:31+00:00  
2 2022-09-19 21:55:37+00:00  
3 2022-09-19 21:54:42+00:00  
4 2022-09-19 21:53:51+00:00  


In [105]:
twitter_df = df
print(twitter_df.shape)

(100, 3)


In [152]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

In [154]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Downloading: 100%|██████████| 747/747 [00:00<00:00, 249kB/s]
Downloading: 100%|██████████| 878k/878k [00:01<00:00, 570kB/s]  
Downloading: 100%|██████████| 446k/446k [00:28<00:00, 16.2kB/s] 
Downloading: 100%|██████████| 150/150 [00:00<00:00, 150kB/s]
Downloading: 100%|██████████| 476M/476M [01:49<00:00, 4.55MB/s] 


In [158]:
def polarity_scores_roberta(example):
    example = twitter_df['Tweet'][6]
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'negative': scores[0],
        'neutral': scores[1],
        'positive': scores[2]
    }
    return scores_dict


res = {}
for i, row in tqdm(twitter_df.iterrows(), total=len(df)):
    try:   
        tweet_text = row['Tweet']
        tweet_id = row['id']
        roberta_results = polarity_scores_roberta(tweet_text)
        res[tweet_id] = roberta_results
    except RuntimeError:
        tweet_id = row['id']
        print ("Broke for {tweet_id} ")

print(res)

  0%|          | 0/100 [00:00<?, ?it/s]

{1571994188660744192: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571992159183593473: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571981363699679233: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571981131238543366: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571980917006278656: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571980514512232452: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571960199564460033: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571958217692909568: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571955389612322816: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571955188034056192: {'negative': 0.02020439, 'neutral': 0.39065418, 'positive': 0.58914137}, 1571952217560875010: {'negative': 0.02020439, 'neutral': 0.

In [159]:
results_df = pd.DataFrame(res).T
results_df = results_df.reset_index().rename(columns={'index' : 'id'})
merged_df = pd.merge(twitter_df,results_df,how='outer')
print (merged_df)

                     id                                              Tweet  \
0   1571994188660744192  @edburtler @shamimamuslim Notice that Shamima ...   
1   1571992159183593473  @edburtler @shamimamuslim QUESTION: \nWhy was ...   
2   1571981363699679233  @FrankOw18664478 @hearttooclean @mandemthe1st ...   
3   1571981131238543366  @FrankOw18664478 @hearttooclean @mandemthe1st ...   
4   1571980917006278656  @FrankOw18664478 @bra_Kofi__ @hearttooclean @m...   
..                  ...                                                ...   
95  1571510724848934915  @Mandemthe1st @ShoeLhaze @BongoIdeas I should ...   
96  1571510199264985088  @GhanaRevenue why are the Telcos charging e-le...   
97  1571509964140843008  @ShoeLhaze @Mandemthe1st @BongoIdeas Bro this ...   
98  1571509203214434307  @Mandemthe1st @ShoeLhaze @BongoIdeas Is Afro d...   
99  1571500062890663941  @Mandemthe1st @ShoeLhaze @BongoIdeas Lol 😂 😹 y...   

                        Date  negative   neutral  positive  
0 