## Data Analysis using IBM Watson's Natural Language Understanding
### Using the data collected from Google and Bing News, analyze the sentiment and emotion behind the text.

In [1]:
# !pip install pandas
# !pip install --upgrade "ibm-watson>=5.2.3"

In [2]:
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from tqdm import tqdm
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, SentimentOptions, CategoriesOptions, EmotionOptions
import csv
import pandas as pd

In [3]:
def get_response_for_sentiment(natural_language_understanding, keyword, engine_url):
    response = natural_language_understanding.analyze(
        url= engine_url,
        features=Features(
            emotion=EmotionOptions(),
            sentiment=SentimentOptions())).get_result()
    return response

In [4]:
def get_sentiment(response):
    total_chars = response['usage']['text_characters']
    doc_sentiment_num = response['sentiment']['document']['score']
    doc_sentiment_label = response['sentiment']['document']['label']
    sadness = response['emotion']['document']['emotion']['sadness']
    joy = response['emotion']['document']['emotion']['joy']
    fear = response['emotion']['document']['emotion']['fear']
    disgust = response['emotion']['document']['emotion']['disgust']
    anger = response['emotion']['document']['emotion']['anger']
    return total_chars, doc_sentiment_num, doc_sentiment_label, sadness, joy, fear, disgust, anger

In [5]:
def main(file_name, sentiment_file):
    api = 'aEgAo9_wTdLPk1gklVeKHukDzkXp5gXote0yttpMPrC0'
    url_ibm = 'https://api.eu-gb.natural-language-understanding.watson.cloud.ibm.com/instances/7fade06f-33fa-4aa6-8e0d-64acca721fa4'
    authenticator = IAMAuthenticator(api)
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2021-03-25', 
        authenticator=authenticator)
    natural_language_understanding.set_service_url(url_ibm)
    csv_file = file_name
    
    python_csv = pd.read_csv(csv_file)
    keyword = python_csv['keyword']
    title = python_csv['title']
    art_url = python_csv['url']
    published_date = python_csv['published_date']
    publisher = python_csv['publisher']
    description = python_csv['description']
    total_chars, doc_sentiment_num, doc_sentiment_label, sadness, joy, fear, disgust, anger = [], [], [], [], [], [], [], []
    
    file_write = open(sentiment_file, 'w', newline='', encoding='UTF8')
    writer = csv.writer(file_write)
    header_csv = ['keyword', 'title', 'url', 'published_date', 'publisher', 'description', 'total_chars', 'doc_sentiment_num', 'doc_sentiment_label', 'sadness', 'joy', 'fear', 'disgust', 'anger']
    writer.writerow(header_csv)
    
    for i in tqdm(range(len(keyword)), desc = "Analyzing Sentiment"):
        try:
            res = get_response_for_sentiment(natural_language_understanding, keyword[i], art_url[i])
            to_insert = [
            keyword[i],
            title[i],
            art_url[i],
            published_date[i],
            publisher[i],
            description[i],
            get_sentiment(res)[0],
            get_sentiment(res)[1],
            get_sentiment(res)[2],
            get_sentiment(res)[3],
            get_sentiment(res)[4],
            get_sentiment(res)[5],
            get_sentiment(res)[6],
            get_sentiment(res)[7]
        ]
        except:
            to_insert = [
            keyword[i],
            title[i],
            art_url[i],
            published_date[i],
            publisher[i],
            description[i],'','','','','','','','']
        writer.writerow(to_insert)

In [6]:
main('raw_google.csv', 'google_with_sentiment.csv')

Analyzing Sentiment: 100%|███████████████████████████████████████████████████████████| 300/300 [10:45<00:00,  2.15s/it]


In [7]:
main('raw_bing.csv', 'bing_with_sentiment.csv')

Analyzing Sentiment: 100%|███████████████████████████████████████████████████████████| 288/288 [08:11<00:00,  1.71s/it]
