## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import ast
import re
from bs4 import BeautifulSoup
from transformers import pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import gensim
import spacy
import json

import nltk
from nltk.tokenize import sent_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

from textblob import TextBlob
stop_list = nltk.corpus.stopwords.words('english')
stemmer = nltk.stem.porter.PorterStemmer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jingy\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
ner_basic = spacy.load('en_core_web_sm')

In [3]:
ner_climate = spacy.load(r"NER model/model-best")

In [4]:
fact_opinion_classifier = pipeline(model="lighteternal/fact-or-opinion-xlmr-el")

## Upload Data

As the analysis requires alot of time, I created a df with only 10 news article for sample.

In [5]:
articles_df = pd.read_csv('all_articles_cleaned_no_unnecessary_words.csv')

In [6]:
articles_sample_df = articles_df.head(10)[['body']]

articles_sample_df

Unnamed: 0,body
0,", San Francisco This video can not be played G..."
1,North Atlantic hurricanes are retaining far mo...
2,US President Donald Trump has accused climate ...
3,video can not be played Sir Ed Davey has won t...
4,Greenland is not used to being the centre of a...
5,Microsoft is poised to launch its game streami...
6,"video can not be played A frequent flyer tax, ..."
7,"People must use less transport, eat less red m..."
8,verely hindered progress in CO2 emissions redu...
9,European Union says it is aiming to become the...


## Step 1: Named Entity Recognition (NER)

In [7]:
def ner_preprocess(text):
    text = re.sub("[^a-zA-Z]", " ", text).split()
    meaningful_words = [w for w in text if w not in STOPS]
    return (" ".join(meaningful_words))

In [8]:
def ner(df): 
    nerdict = {'PERSON': {}, 'ORG': {}, 'EVENT': {}, 'GPE': {}, 'LOC': {}}
    
    # normal entities
    doc = ner_basic(df['body'])
    for ent in doc.ents:
        if ent.label_ in nerdict.keys():
            if ent.text in nerdict[ent.label_].keys():
                nerdict[ent.label_][ent.text] += 1
            else:
                nerdict[ent.label_][ent.text] = 1
           
    # climate entities
    climatedoc = ner_climate(df['body'])
    climatedict = {}
    for ent in climatedoc.ents:
        if ent.text in climatedict.keys() and ent.label_ == "CLIMATE":
            climatedict[ent.text] += 1
        else:
            climatedict[ent.text] = 1
    nerdict["CLIMATE"] = climatedict
        
    # sorting of entities
    for key in nerdict.keys():
        ndict = nerdict[key]
        keys = list(ndict.keys())
        values = list(ndict.values())
        sorted_value_index = np.argsort(values)[::-1]
        ndict = {keys[i]: values[i] for i in sorted_value_index}
        nerdict[key] = ndict
    
    for ner in nerdict:
        df[ner] = nerdict[ner]
        
    return df

In [9]:
# articles_sample_df["clean body"] = articles_sample_df["body"].apply(ner_preprocess)

articles_sample_df = articles_sample_df.apply(ner, axis=1)

In [10]:
articles_sample_df

Unnamed: 0,body,PERSON,ORG,EVENT,GPE,LOC,CLIMATE
0,", San Francisco This video can not be played G...","{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC News': 2, 'BBC': 1, 'Copernicus': 1, 'th...",{},"{'Greenland': 9, 'Ukraine': 2, 'San Francisco'...","{'Earth': 2, 'Central Europe': 1, 'south': 1, ...","{'sea-level rise': 3, 'ocean': 2, 'flooding': ..."
1,North Atlantic hurricanes are retaining far mo...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{'Hurricane Theta': 1},"{'Ukraine': 2, 'US': 2, 'Athens': 1, 'Norway':...","{'North Atlantic': 2, 'Central Europe': 1, 'th...","{'storms': 3, 'hurricanes': 2, 'rise': 1, 'oli..."
2,US President Donald Trump has accused climate ...,"{'Trump': 7, 'Alan Shearer': 1, 'Gary Lineker'...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'US': 6, 'Ukraine': 2, 'Paris': 2, 'Athens': ...","{'Central Europe': 1, 'earth': 1}","{'climate change': 3, 'IPCC': 2, 'Climate chan..."
3,video can not be played Sir Ed Davey has won t...,"{'Ed': 9, 'Ms Moran': 4, 'Jo Swinson': 2, 'Ms ...","{'BBC': 2, 'Parliament': 2, 'Micah Richards': ...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':..."
4,Greenland is not used to being the centre of a...,"{'Greenlandic': 2, 'Greenland': 2, 'Alan Shear...","{'BBC': 2, 'Copenhagen': 2, 'Micah Richards': ...",{},"{'Greenland': 22, 'US': 14, 'China': 4, 'Denma...","{'Arctic': 2, 'Central Europe': 1, 'Arctic Oce...","{'rise': 1, 'oligarchs': 1, 'climate changes':..."
5,Microsoft is poised to launch its game streami...,"{'Hazas': 3, 'Lancaster': 2, 'Alan Shearer': 1...","{'Microsoft': 3, 'Google': 2, 'Lancaster Unive...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'cloud': 3, 'carbon neutral': 2, 'rise': 1, '..."
6,"video can not be played A frequent flyer tax, ...","{'Covid': 3, 'Extinction Rebellion': 2, 'Alan ...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'UK': 3, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':..."
7,"People must use less transport, eat less red m...","{'Ian': 7, 'Brexit': 2, 'Alan Shearer': 1, 'Ga...","{'BBC': 1, 'the Confederation of British Indus...",{},"{'UK': 6, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'Net Zero': 2, 'rise': 1, 'good position': 1,..."
8,verely hindered progress in CO2 emissions redu...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 2, 'UN': 2, 'Micah Richards': 1, 'Prem...",{},"{'Ukraine': 2, 'Paris': 2, 'Athens': 1, 'Norwa...",{'Central Europe': 1},"{'greenhouse gases': 3, 'CO2 emissions': 2, 'o..."
9,European Union says it is aiming to become the...,"{'Cañete': 2, 'Alan Shearer': 1, 'Gary Lineker...","{'EU': 11, 'UN': 2, '1.5C': 2, 'BBC': 1, 'Mica...",{},"{'Ukraine': 2, 'Sweden': 2, 'Poland': 2, 'Pari...","{'Europe': 2, 'Central Europe': 1}","{'net-zero emissions': 3, 'emissions': 2, 'ris..."


## Step 2: Additional Preprocessing

In [11]:
def basic_preprocess_keepstopwords(self_text):
    # 1. Remove html tags
    words = BeautifulSoup(self_text, features="html.parser").get_text()
    
    # 2. Convert words to lower case and split each word up
    words = self_text.lower()
    words = words.replace('\n','')
    
    # remove punctuation 
    punc = '''()-[]{};:\,<>/@#$%^&*_~'''
    for ele in punc:
        words = words.replace(ele, "")
     
    words = words.encode('ascii', 'ignore')
    words = words.decode()
    return words

In [12]:
articles_sample_df['body'] = articles_sample_df['body'].apply(basic_preprocess_keepstopwords)

articles_sample_df

Unnamed: 0,body,PERSON,ORG,EVENT,GPE,LOC,CLIMATE
0,san francisco this video can not be played gr...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC News': 2, 'BBC': 1, 'Copernicus': 1, 'th...",{},"{'Greenland': 9, 'Ukraine': 2, 'San Francisco'...","{'Earth': 2, 'Central Europe': 1, 'south': 1, ...","{'sea-level rise': 3, 'ocean': 2, 'flooding': ..."
1,north atlantic hurricanes are retaining far mo...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{'Hurricane Theta': 1},"{'Ukraine': 2, 'US': 2, 'Athens': 1, 'Norway':...","{'North Atlantic': 2, 'Central Europe': 1, 'th...","{'storms': 3, 'hurricanes': 2, 'rise': 1, 'oli..."
2,us president donald trump has accused climate ...,"{'Trump': 7, 'Alan Shearer': 1, 'Gary Lineker'...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'US': 6, 'Ukraine': 2, 'Paris': 2, 'Athens': ...","{'Central Europe': 1, 'earth': 1}","{'climate change': 3, 'IPCC': 2, 'Climate chan..."
3,video can not be played sir ed davey has won t...,"{'Ed': 9, 'Ms Moran': 4, 'Jo Swinson': 2, 'Ms ...","{'BBC': 2, 'Parliament': 2, 'Micah Richards': ...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':..."
4,greenland is not used to being the centre of a...,"{'Greenlandic': 2, 'Greenland': 2, 'Alan Shear...","{'BBC': 2, 'Copenhagen': 2, 'Micah Richards': ...",{},"{'Greenland': 22, 'US': 14, 'China': 4, 'Denma...","{'Arctic': 2, 'Central Europe': 1, 'Arctic Oce...","{'rise': 1, 'oligarchs': 1, 'climate changes':..."
5,microsoft is poised to launch its game streami...,"{'Hazas': 3, 'Lancaster': 2, 'Alan Shearer': 1...","{'Microsoft': 3, 'Google': 2, 'Lancaster Unive...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'cloud': 3, 'carbon neutral': 2, 'rise': 1, '..."
6,video can not be played a frequent flyer tax p...,"{'Covid': 3, 'Extinction Rebellion': 2, 'Alan ...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'UK': 3, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':..."
7,people must use less transport eat less red me...,"{'Ian': 7, 'Brexit': 2, 'Alan Shearer': 1, 'Ga...","{'BBC': 1, 'the Confederation of British Indus...",{},"{'UK': 6, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'Net Zero': 2, 'rise': 1, 'good position': 1,..."
8,verely hindered progress in co2 emissions redu...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 2, 'UN': 2, 'Micah Richards': 1, 'Prem...",{},"{'Ukraine': 2, 'Paris': 2, 'Athens': 1, 'Norwa...",{'Central Europe': 1},"{'greenhouse gases': 3, 'CO2 emissions': 2, 'o..."
9,european union says it is aiming to become the...,"{'Cañete': 2, 'Alan Shearer': 1, 'Gary Lineker...","{'EU': 11, 'UN': 2, '1.5C': 2, 'BBC': 1, 'Mica...",{},"{'Ukraine': 2, 'Sweden': 2, 'Poland': 2, 'Pari...","{'Europe': 2, 'Central Europe': 1}","{'net-zero emissions': 3, 'emissions': 2, 'ris..."


## Step 3: Sentence Tokenisation

In [13]:
articles_sample_df['sentences'] = articles_sample_df['body'].apply(sent_tokenize)

articles_sample_df

Unnamed: 0,body,PERSON,ORG,EVENT,GPE,LOC,CLIMATE,sentences
0,san francisco this video can not be played gr...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC News': 2, 'BBC': 1, 'Copernicus': 1, 'th...",{},"{'Greenland': 9, 'Ukraine': 2, 'San Francisco'...","{'Earth': 2, 'Central Europe': 1, 'south': 1, ...","{'sea-level rise': 3, 'ocean': 2, 'flooding': ...",[ san francisco this video can not be played g...
1,north atlantic hurricanes are retaining far mo...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{'Hurricane Theta': 1},"{'Ukraine': 2, 'US': 2, 'Athens': 1, 'Norway':...","{'North Atlantic': 2, 'Central Europe': 1, 'th...","{'storms': 3, 'hurricanes': 2, 'rise': 1, 'oli...",[north atlantic hurricanes are retaining far m...
2,us president donald trump has accused climate ...,"{'Trump': 7, 'Alan Shearer': 1, 'Gary Lineker'...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'US': 6, 'Ukraine': 2, 'Paris': 2, 'Athens': ...","{'Central Europe': 1, 'earth': 1}","{'climate change': 3, 'IPCC': 2, 'Climate chan...",[us president donald trump has accused climate...
3,video can not be played sir ed davey has won t...,"{'Ed': 9, 'Ms Moran': 4, 'Jo Swinson': 2, 'Ms ...","{'BBC': 2, 'Parliament': 2, 'Micah Richards': ...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':...",[video can not be played sir ed davey has won ...
4,greenland is not used to being the centre of a...,"{'Greenlandic': 2, 'Greenland': 2, 'Alan Shear...","{'BBC': 2, 'Copenhagen': 2, 'Micah Richards': ...",{},"{'Greenland': 22, 'US': 14, 'China': 4, 'Denma...","{'Arctic': 2, 'Central Europe': 1, 'Arctic Oce...","{'rise': 1, 'oligarchs': 1, 'climate changes':...",[greenland is not used to being the centre of ...
5,microsoft is poised to launch its game streami...,"{'Hazas': 3, 'Lancaster': 2, 'Alan Shearer': 1...","{'Microsoft': 3, 'Google': 2, 'Lancaster Unive...",{},"{'Ukraine': 2, 'Athens': 1, 'Norway': 1, 'US':...",{'Central Europe': 1},"{'cloud': 3, 'carbon neutral': 2, 'rise': 1, '...",[microsoft is poised to launch its game stream...
6,video can not be played a frequent flyer tax p...,"{'Covid': 3, 'Extinction Rebellion': 2, 'Alan ...","{'BBC': 1, 'Micah Richards': 1, 'Premier Leagu...",{},"{'UK': 3, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'rise': 1, 'oligarchs': 1, 'climate changes':...",[video can not be played a frequent flyer tax ...
7,people must use less transport eat less red me...,"{'Ian': 7, 'Brexit': 2, 'Alan Shearer': 1, 'Ga...","{'BBC': 1, 'the Confederation of British Indus...",{},"{'UK': 6, 'Ukraine': 2, 'Athens': 1, 'Norway':...",{'Central Europe': 1},"{'Net Zero': 2, 'rise': 1, 'good position': 1,...",[people must use less transport eat less red m...
8,verely hindered progress in co2 emissions redu...,"{'Alan Shearer': 1, 'Gary Lineker': 1, 'Robert...","{'BBC': 2, 'UN': 2, 'Micah Richards': 1, 'Prem...",{},"{'Ukraine': 2, 'Paris': 2, 'Athens': 1, 'Norwa...",{'Central Europe': 1},"{'greenhouse gases': 3, 'CO2 emissions': 2, 'o...",[verely hindered progress in co2 emissions red...
9,european union says it is aiming to become the...,"{'Cañete': 2, 'Alan Shearer': 1, 'Gary Lineker...","{'EU': 11, 'UN': 2, '1.5C': 2, 'BBC': 1, 'Mica...",{},"{'Ukraine': 2, 'Sweden': 2, 'Poland': 2, 'Pari...","{'Europe': 2, 'Central Europe': 1}","{'net-zero emissions': 3, 'emissions': 2, 'ris...",[european union says it is aiming to become th...


## Step 4: Filter out factual sentences with the entity & retrieve its contextual sentences

In [14]:
def fact_opinion_classification(df):
    nerdict = ['PERSON', 'ORG', 'EVENT', 'GPE', 'LOC', 'CLIMATE']
    ner_dict = {}
    
    # each entity type
    for ner in nerdict:
        ner_term_dict = {}
        # each key in entity type
        for ner_term in df[ner]:
            ner_term = ner_term.replace("'", "")
            # each sentence in the article 
            for idx, sentence in enumerate(df['sentences']):
                if ner_term.lower() in sentence:
                    sentence_result = fact_opinion_classifier(sentence)[0]
                    #print(sentence, sentence_result)
                    # if it is an opinionated sentence
                    if  sentence_result["label"] == "LABEL_0":
                        passage = ""
                        # get all the window sentences
                        if (idx != 0 and (idx != len(df['sentences']) - 1)):
                            passage = df['sentences'][idx-1] + sentence + df['sentences'][idx+1]
                        # if it is the first sentence
                        elif (idx == 0):
                            passage = sentence + df['sentences'][idx+1]
                        # if it is the last sentence
                        else:
                            passage = df['sentences'][idx-1] + sentence 
                        # add the sub-document
                        if ner_term in ner_term_dict:
                            ner_term_dict[ner_term] = ner_term_dict[ner_term] + " " + passage
                        else:
                            ner_term_dict[ner_term] = passage
                            
        ner_dict[ner] = ner_term_dict             
           
    df['ner_sentences'] = ner_dict

    return df

In [None]:
articles_sample_df = articles_sample_df.apply(fact_opinion_classification, axis=1)

In [None]:
articles_sample_df.head()

## Step 5: Sentiment Analysis

In [None]:
def sentiment_analysis_vader(df):
    nerdict = ['PERSON', 'ORG', 'EVENT', 'GPE', 'LOC', 'CLIMATE']
    sid = SentimentIntensityAnalyzer()
    
    for ner in nerdict:
        sentiment_result_dict = {}
        for ner_term in df[ner]:
            result = sid.polarity_scores(df[ner][ner_term])
            if result['compound'] > 0.05:
                sentiment_result_dict[ner_term] =  ('Positive', result['compound'])
            elif result['compound'] < -0.05:
                sentiment_result_dict[ner_term] = ('Negative', result['compound'])
            else:
                sentiment_result_dict[ner_term] = ("Neutral", result['compound'])

        df[ner] = sentiment_result_dict
        
    return df

In [None]:
articles_sample_df['sentiment'] = articles_sample_df['ner_sentences'].apply(sentiment_analysis_vader)

In [None]:
articles_sample_df

# Aggrgated Insights

For this segment, we only used BBC & CNN dataset due to time limit.

### 1. Upload articles with their sentiment scoring for each entity term

In [None]:
entity_sentiment_df = pd.read_csv('article_entity_sentiment.csv')

entity_sentiment_df = entity_sentiment_df[['sentiment']]

entity_sentiment_df.head()

### 2. Upload top 20 NER terms for each entity type

In [None]:
ner_file = open("ner.txt", "r")

ner_list = json.loads(ner_file.read())

In [None]:
for ner in ner_list:
    term_dict = {}
    for term in ner_list[ner]:
        term_dict[term] = [0,0]
    ner_list[ner] = term_dict

In [None]:
ner_list

### 3. Get the sentiment score for each entity term 

In [None]:
ner_type = ['PERSON', 'ORG', 'EVENT', 'GPE', 'LOC', 'CLIMATE']

def get_sentiment_score_by_entity_term(df):
    res = ast.literal_eval(df['sentiment'])
    for ner in ner_type:
        for term in ner_list[ner]:
            if term in res[ner]:
                ner_list[ner][term][0] += res[ner][term][1]
                ner_list[ner][term][1] += 1
                
    return df

In [None]:
entity_df = entity_sentiment_df.apply(get_sentiment_score_by_entity_term, axis=1)

In [None]:
for ner in ner_type:
    for term in ner_list[ner]:
        if ner_list[ner][term][1] != 0:
            ner_list[ner][term] = ner_list[ner][term][0] / ner_list[ner][term][1]
        else:
            ner_list[ner][term] = 0

In [None]:
f = open("ner_sentiment_score.txt", "w")
f.write(str(ner_list))

In [None]:
ner_list

# Training & Evaluation of Models

### 1. Upload Labeled Data

In [None]:
data = pd.read_excel('Sentiment_Labelling.xlsx')

data = data[['Sentences', 'Label']]

data

### 2. Pre-processing & Train-test Split

In [None]:
def basic_preprocess_keepstopwords(self_text):
    # 1. Remove html tags
    words = BeautifulSoup(self_text, features="html.parser").get_text()
    
    # 2. Convert words to lower case and split each word up
    words = self_text.lower()
    words = words.replace('\n','')
    
    # remove punctuation 
    punc = '''()-[]{};:\,<>/@#$%^&*_~'''
    for ele in punc:
        words = words.replace(ele, "")
     
    words = words.encode('ascii', 'ignore')
    words = words.decode()
    return words

In [None]:
data['Sentences'] = data['Sentences'].apply(basic_preprocess_keepstopwords)

data.head()

In [None]:
reviews = data['Sentences'].values
labels = data['Label'].values
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(labels)

In [None]:
# Split into 80% train and 20% test 
train_sentences, test_sentences, train_labels, test_labels = train_test_split(reviews, encoded_labels, random_state=88, stratify = encoded_labels)

### 3. Training & Evaluation

#### Naive Bays 

In [None]:
train_corpus = []

for text in train_sentences:
    sent = nltk.word_tokenize(text)
    train_corpus.append(sent)
    
train_dictionary = gensim.corpora.Dictionary(train_corpus)

In [None]:
labeled_training_data = []

for (l, s) in zip(train_labels, train_corpus):
    # Convert the original sentence into a vector.
    vector = train_dictionary.doc2bow(s)
    
    # Create a dict object to store the document vector (in order to use NLTK's classifier later)
    sent_as_dict = {id:1 for (id, tf) in vector}
    
    # Add the labeled sentence to the labeled data set.
    labeled_training_data.append((sent_as_dict, l))

In [None]:
test_corpus = []

for text in test_sentences:
    sent = nltk.word_tokenize(text)
    test_corpus.append(sent)
    
test_dictionary = gensim.corpora.Dictionary(test_corpus)

In [None]:
labeled_test_data = []

for (l, s) in zip(test_labels, test_corpus):

    # Convert the original sentence into a vector.
    vector = test_dictionary.doc2bow(s)
    
    # Create a dict object to store the document vector (in order to use NLTK's classifier later)
    sent_as_dict = {id:1 for (id, tf) in vector}
    
    # Add the labeled sentence to the labeled data set.
    labeled_test_data.append((sent_as_dict, l))

In [None]:
classifier = nltk.NaiveBayesClassifier.train(labeled_training_data)

In [None]:
# Check the accurary
print("Accuracy on Naive Bays: ", nltk.classify.accuracy(classifier, labeled_test_data))

#### TextBlob

In [None]:
def sentiment_analysis_textblob(sentence):
    def getSubjectivity(text):
        return TextBlob(text).sentiment.subjectivity
  
    #Create a function to get the polarity
    def getPolarity(text):
        return TextBlob(text).sentiment.polarity
    
    news = {'Sentences' : sentence}

    #Create two new columns ‘Subjectivity’ & ‘Polarity’
    news['TextBlob_Subjectivity'] = getSubjectivity(news['Sentences'])
    news['TextBlob_Polarity'] = getPolarity(news['Sentences'])
    
    def getAnalysis(score):
        if score < 0:
            return 0
        elif score == 0:
            return 1
        else:
            return 2

    news['TextBlob_Analysis'] = getAnalysis(news['TextBlob_Polarity'])
    return news['TextBlob_Analysis']


In [None]:
test_df = pd.DataFrame(data={'sentence': test_sentences, 'labels': test_labels})

test_df['textblob_scores'] = test_df['sentence'].apply(sentiment_analysis_textblob)

print("Accuracy on TextBlob: ", accuracy_score(test_df['labels'],test_df['textblob_scores']))

#### Vader

In [None]:
def sentiment_analysis_vader(sentence):
    sid = SentimentIntensityAnalyzer()
    
    result = sid.polarity_scores(sentence)
        
    if result['compound'] > 0.05:
        sentiment_result =  2
    elif result['compound'] < -0.05:
        sentiment_result = 0
    else:
        sentiment_result = 1
    
    return sentiment_result

In [None]:
test_df = pd.DataFrame(data={'sentence': test_sentences, 'labels': test_labels})

vader_sentiment = SentimentIntensityAnalyzer()
test_df['vader_scores'] = test_df['sentence'].apply(sentiment_analysis_vader)

print("Accuracy on VADER: ", accuracy_score(test_df['labels'],test_df['vader_scores']))