## Insert the data into Firestore for posterity
##### This data will be used for analysis in the main Capstone Paper, but not for the main analysis notebook.
Inserts the data from IBM Watson, Google and Bing into Google Firestore.

**Note**: Due to limited number of queries that Google Firestore allows, the application JSON will not be included in the repository, hence it is not possible to run this notebook.

A Python script similar to this runs on a daily basis on Google Functions, and stores the data into Google Firestore.

In [1]:
#!pip install --upgrade firebase-admin
#!pip install ibm-watson

In [2]:
import firebase_admin
import csv
import pandas as pd
import os
import json
from dateutil import parser
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, SentimentOptions
from firebase_admin import credentials, firestore
from tqdm import tqdm
import requests

In [3]:
curr_path = os.path.abspath(os.getcwd())
cps_config_file = "\capstone-8ddb9-firebase-adminsdk-gx85u-8b516e44f1.json"
cps_config_path = curr_path + cps_config_file

In [4]:
cred = credentials.Certificate(cps_config_path)
firebase_admin.initialize_app(cred)
db = firestore.client()

In [5]:
#INSERT YOUR API PARAMS
url_api = ''
url_ibm = ''

In [6]:
ibm_authenticator = IAMAuthenticator(url_api)
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2021-03-25', 
    authenticator=ibm_authenticator)
natural_language_understanding.set_service_url(url_ibm)


In [7]:
keyword_list_bing = []
keyword_list_google = []
try:
    keyword_list_bing = pd.read_csv('bing.csv')
except:
    print ("Couldn't find Bing file.")

try:
    keyword_list_google = pd.read_csv('google_results.csv')
except:
    print ("Couldn't find Google file.")

In [8]:
def insert_to_firestore(keyword_list, db, csv_file, search_engine):
    count = 0
    for i in tqdm(range(len(keyword_list)), desc = "Inserting Data to Firestore"):
        try:
            try:
                response = natural_language_understanding.analyze(
                    url= keyword_list['url'][i],
                    features=Features(sentiment=SentimentOptions(targets=[keyword_list['keyword'][i]]))).get_result()
                document_sentiment = response['sentiment']['document']['score']
                sentiment_label_document = response['sentiment']['document']['label']
                keyword_sentiment = response['sentiment']['targets'][0]['score']
                sentiment_label_keyword = response['sentiment']['targets'][0]['label']

            except:
                response = natural_language_understanding.analyze(
                    url= keyword_list['url'][i],
                    features=Features(sentiment=SentimentOptions())).get_result()
                document_sentiment = response['sentiment']['document']['score']
                sentiment_label_document = response['sentiment']['document']['label']
                keyword_sentiment = None
                sentiment_label_keyword = None
        except:
            response = natural_language_understanding.analyze(
                text= keyword_list['description'][i],
                features=Features(sentiment=SentimentOptions())).get_result()
            document_sentiment = response['sentiment']['document']['score']
            sentiment_label_document = response['sentiment']['document']['label']
            keyword_sentiment = None
            sentiment_label_keyword = None
        count += 1
        data_to_insert = {
        'search_engine': search_engine,
        'title': keyword_list['title'][i],
        'url': keyword_list['url'][i],
        'published_date': parser.parse(keyword_list['published_date'][i]),
        'publisher': keyword_list['publisher'][i],
        'description': keyword_list['description'][i],
        'document_sentiment': document_sentiment,
        'document_label': sentiment_label_document,
        'keyword_sentiment': keyword_sentiment,
        'keyword_label': sentiment_label_keyword
        }

        doc_ref = db.collection(keyword_list['keyword'][i]).document()
        doc_ref.set(data_to_insert)
        
    os.remove(csv_file)

In [9]:
#insert_to_firestore(keyword_list_bing, db, 'bing.csv', 'bing')

Inserting Data to Firestore: 100%|███████████████████████████████████████████████████| 300/300 [16:42<00:00,  3.34s/it]


In [1]:
#insert_to_firestore(keyword_list_google, db, 'google_results.csv', 'google')