[Reference](https://medium.com/@jeetmohapatra98/sentiment-analysis-of-tweets-baec6757264d)

In [1]:
!pip install snscrape -q
!pip install transformers -q
!pip install scipy -q

[K     |████████████████████████████████| 4.2 MB 8.6 MB/s 
[K     |████████████████████████████████| 596 kB 36.4 MB/s 
[K     |████████████████████████████████| 6.6 MB 36.1 MB/s 
[K     |████████████████████████████████| 86 kB 4.6 MB/s 
[?25h

In [2]:
## importing libraries:

import snscrape.modules.twitter as snt
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

In [3]:
# We can also create a querry generator for twitter search
def query_generator(user_name:str,to_date:str,from_date:str)->str:
  return str(f"(from:{user_name}) until:{to_date} since:{from_date}")

In [4]:
def data_collection(querry:str,limit:int):
  tweets = []
  for tweet in snt.TwitterSearchScraper(query).get_items():
      if len(tweets) == limit:
          break
      else:
          tweets.append([tweet.date, tweet.username, tweet.content])
        
  df = pd.DataFrame(tweets, columns=['Date', 'User', 'Tweet'])
  return df

In [5]:
query = query_generator('elonmusk','2020-01-01','2010-01-01')
limit = 10
data = data_collection(query,limit)
data

Unnamed: 0,Date,User,Tweet
0,2019-12-31 21:37:06+00:00,elonmusk,@engineers_feed @physicsJ It’s a bit slow
1,2019-12-31 06:59:34+00:00,elonmusk,@JohnnaCrider1 It’s not ready yet
2,2019-12-31 06:57:57+00:00,elonmusk,@newscientist Explains 🐈 🎥
3,2019-12-31 02:27:28+00:00,elonmusk,@teslaownersSV @rhoehn Thanks all Tesla club m...
4,2019-12-30 23:27:10+00:00,elonmusk,Rest in peace Syd Mead. Your art will endure.
5,2019-12-30 23:09:08+00:00,elonmusk,@kulpability @cleantechnica C tbh
6,2019-12-30 23:00:08+00:00,elonmusk,@John_Gardi @Erdayastronaut Many ways to solve...
7,2019-12-30 22:44:02+00:00,elonmusk,@ShaneAppleton7 @Erdayastronaut Building proto...
8,2019-12-30 22:41:38+00:00,elonmusk,@JaneidyEve Headed to Tesla Fremont factory to...
9,2019-12-30 22:39:21+00:00,elonmusk,@EvaFoxU Biting off more than I can chew. Beca...


In [6]:
def relabel(scent:str) ->str:
  words = []
  for word in scent.split(" "):
    if word.startswith('@') and len(word) > 1:
      word = '@user'

    elif word.startswith('http://'):
      word = 'http'
    words.append(word)
  tweet = " ".join(words)
  return tweet

In [7]:
# load model and tokenizer
roberta = "cardiffnlp/twitter-roberta-base-sentiment"

model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [8]:
def analyse(data):
  data = data["Tweet"]
  df_analysed = pd.DataFrame(index=range(len(data)),columns=['Tweet','Negative', 'Neutral', 'Positive'])
  for i in range(len(data)):
    tweet_proc = relabel(data[i])
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    output = model(**encoded_tweet)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    df_analysed["Tweet"][i] = tweet_proc
    df_analysed["Negative"][i] = scores[0]
    df_analysed["Neutral"][i] = scores[1]
    df_analysed["Positive"][i] = scores[2]
  return df_analysed

In [9]:
final_data = analyse(data)
final_data

Unnamed: 0,Tweet,Negative,Neutral,Positive
0,@user @user It’s a bit slow,0.582586,0.387859,0.029556
1,@user It’s not ready yet,0.491959,0.474992,0.03305
2,@user Explains 🐈 🎥,0.011154,0.70651,0.282336
3,@user @user Thanks all Tesla club members for ...,0.000821,0.009198,0.989982
4,Rest in peace Syd Mead. Your art will endure.,0.026475,0.390003,0.583522
5,@user @user C tbh,0.117984,0.778231,0.103785
6,"@user @user Many ways to solve this problem, b...",0.547553,0.396345,0.056101
7,@user @user Building prototypes is relatively ...,0.086065,0.494218,0.419717
8,@user Headed to Tesla Fremont factory tomorrow...,0.001949,0.397226,0.600825
9,@user Biting off more than I can chew. Because...,0.734637,0.218241,0.047122
