### Sentiment Analysis
#### TextBlob

In [None]:
!pip install vaderSentiment



In [None]:
### Sentiment analysis
import nltk
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('brown')
nltk.download('movie_reviews')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


True

In [None]:
post = "Our analysis shows concerted efforts by coordinated accounts to disseminate misleading, redundant, biased, and AI-generated content through a cross-platform information operation. The network spans X and YouTube, disseminating political content through duplicated mock news sites."

In [None]:
blob = TextBlob(post)

In [None]:
import nltk
nltk.download('punkt_tab')

# TextBlob provides basic nlp functions
# Tokenizing a sentence
blob.tokens

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


WordList(['Our', 'analysis', 'shows', 'concerted', 'efforts', 'by', 'coordinated', 'accounts', 'to', 'disseminate', 'misleading', ',', 'redundant', ',', 'biased', ',', 'and', 'AI-generated', 'content', 'through', 'a', 'cross-platform', 'information', 'operation', '.', 'The', 'network', 'spans', 'X', 'and', 'YouTube', ',', 'disseminating', 'political', 'content', 'through', 'duplicated', 'mock', 'news', 'sites', '.'])

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')

# POS tagging
blob.tags

[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


[('Our', 'PRP$'),
 ('analysis', 'NN'),
 ('shows', 'VBZ'),
 ('concerted', 'VBN'),
 ('efforts', 'NNS'),
 ('by', 'IN'),
 ('coordinated', 'JJ'),
 ('accounts', 'NNS'),
 ('to', 'TO'),
 ('disseminate', 'VB'),
 ('misleading', 'NN'),
 ('redundant', 'NN'),
 ('biased', 'VBN'),
 ('and', 'CC'),
 ('AI-generated', 'JJ'),
 ('content', 'NN'),
 ('through', 'IN'),
 ('a', 'DT'),
 ('cross-platform', 'JJ'),
 ('information', 'NN'),
 ('operation', 'NN'),
 ('The', 'DT'),
 ('network', 'NN'),
 ('spans', 'NNS'),
 ('X', 'VBP'),
 ('and', 'CC'),
 ('YouTube', 'NNP'),
 ('disseminating', 'VBG'),
 ('political', 'JJ'),
 ('content', 'NN'),
 ('through', 'IN'),
 ('duplicated', 'VBN'),
 ('mock', 'NN'),
 ('news', 'NN'),
 ('sites', 'NNS')]

In [None]:
# Let's only filter nouns using blob
blob.noun_phrases

WordList(['analysis shows', 'ai-generated', 'cross-platform information operation', 'network spans', 'youtube', 'political content', 'mock news sites'])

In [None]:
# count how many times 'youtube' appear in the sentence
blob.words.count('youtube')

1

In [None]:
# TextBlob by default implements dictionary-based sentiment analysis
# "By default, it calculates average polarity and subjectivity over each word in a given text using a dictionary of adjectives and their hand-tagged scores."
# https://stackoverflow.com/questions/43688542/textblob-sentiment-algorithm (the first answer is partially wrong)
blob.sentiment

Sentiment(polarity=-0.1, subjectivity=0.15000000000000002)

Textblob sentiment output:

Polarity in [-1, 1] := [most negative, most positive]

Subjectivity in [0, 1] := [factual, personal opinion]

In [None]:
# let's try other sentences
test_msg1 = 'this is not the best football team'

In [None]:
blob = TextBlob(test_msg1)
blob.sentiment

Sentiment(polarity=1.0, subjectivity=0.3)

In [None]:
test_msg2 = 'hey this is not too bad'

In [None]:
blob = TextBlob(test_msg2)
blob.sentiment

Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)

In [None]:
# NaiveBayesAnalyzer option, trained on movie reviews
# https://www.dataquest.io/blog/naive-bayes-tutorial/
from textblob.sentiments import NaiveBayesAnalyzer

print(test_msg1)
blob = TextBlob(test_msg1, analyzer=NaiveBayesAnalyzer())
print(blob.sentiment)

print(test_msg2)
blob = TextBlob(test_msg2, analyzer=NaiveBayesAnalyzer())
print(blob.sentiment)

this is not the best football team
Sentiment(classification='neg', p_pos=0.41870201702509297, p_neg=0.5812979829749073)
hey this is not too bad
Sentiment(classification='neg', p_pos=0.21032203786065207, p_neg=0.7896779621393482)


In [None]:
blob = TextBlob(post, analyzer=NaiveBayesAnalyzer())
blob.sentiment

Sentiment(classification='pos', p_pos=0.9975511715073467, p_neg=0.002448828492648301)

Train your own classifers:
https://textblob.readthedocs.io/en/dev/api_reference.html#module-textblob.classifiers

#### VADER (Valence Aware Dictionary and Sentiment Reasoner)

In [None]:
# sentiment vader
# dictionary-based, with rule-based adjustments
analyser = SentimentIntensityAnalyzer()

In [None]:
help(analyser.polarity_scores)

Help on method polarity_scores in module vaderSentiment.vaderSentiment:

polarity_scores(text) method of vaderSentiment.vaderSentiment.SentimentIntensityAnalyzer instance
    Return a float for sentiment strength based on the input text.
    Positive values are positive valence, negative value are negative
    valence.



Compound scores in [-1, 1] := [most negative, most positive]

neg, neu, pos in [0, 1]

In [None]:
sentiment = analyser.polarity_scores('This is an example of a happy post')
print(sentiment)

{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'compound': 0.5719}


In [None]:
sentiment['neg'] + sentiment['neu'] + sentiment['pos']

1.0

In [None]:
# impact of punctuation
sentiment = analyser.polarity_scores('This is an example of a happy post!')
print(sentiment)

{'neg': 0.0, 'neu': 0.637, 'pos': 0.363, 'compound': 0.6114}


In [None]:
# impact of emoji
sentiment = analyser.polarity_scores('This is an example of a happy 😁 ❤️ post! ')
print(sentiment)

{'neg': 0.0, 'neu': 0.65, 'pos': 0.35, 'compound': 0.7901}


In [None]:
print(test_msg1)
sentiment = analyser.polarity_scores(test_msg1)
print(sentiment)

print(test_msg2)
sentiment = analyser.polarity_scores(test_msg2)
print(sentiment)

this is not the best football team
{'neg': 0.36, 'neu': 0.64, 'pos': 0.0, 'compound': -0.5216}
hey this is not too bad
{'neg': 0.0, 'neu': 0.637, 'pos': 0.363, 'compound': 0.431}


In [None]:
print(post)
sentiment = analyser.polarity_scores(post)
print(sentiment)

Our analysis shows concerted efforts by coordinated accounts to disseminate misleading, redundant, biased, and AI-generated content through a cross-platform information operation. The network spans X and YouTube, disseminating political content through duplicated mock news sites.
{'neg': 0.192, 'neu': 0.808, 'pos': 0.0, 'compound': -0.765}
