<a href="https://colab.research.google.com/github/FDDI-CentOS/LPTHW/blob/master/iAtk_Other_Bets_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## ***Internal Audit Tool Kit (iAtk): Other Bets Sentiment Analysis***
This tool can be used to assist in reputational risk monitoring and assess prior news impact to a targets overall reputation based on news story search results scraped from Google News.  Note that this analysis can be skewed by the data source, as such large / diverse data sets are preferred to help enhance accuracy.

**Polarity Range: (-1 to 1)** Values closer to -1 indicate positive sentiment, in contrast values closer to -1 indicate negative sentiment.

**Subjectivity Range: (0 to 1)** Values closer to 1 indicate greater subjectivity, in contrast values closer to 0 indicate greater objectivity



In [0]:
#@title Default title text
# Google News Search and Sentiment Analysis
# djarguello@ 8-17-19

# Prepare runtime environment
# Note only need to run once
!pip install textblob bs4 requests 
!pip install pydrive

In [0]:
from textblob import TextBlob
from bs4 import BeautifulSoup
import requests
import re

# Initialize lists: Update keywords to tune results
other_bets = ['waymo',
              'verily',
              'access',
              'deepmind',
              'calico',
              'capitalg',
              'googleventures', 
              'sidewalk', 
              'wing',
              'loon',
              'jigsaw',
              'makani',
              'x']

keywords = ['waymo',
            'verily',
            'access',
            'deepmind',
            'calico',
            'capitalg',
            'googleventures', 
            'sidewalk', 
            'wing',
            'loon',
            'jigsaw',
            'makani',
            'x']

drive_files = {'waymo':'1dLwfY061BdcPGuDUo0VBHpIhBS4414D4Ynv0e7SbqEU'}
,
#               'verily':,
#               'access':,
#               'deepmind':,
#               'calico':,
#               'capitalg':,
#               'googleventures':, 
#               'sidewalk':, 
#               'wing':,
#               'loon':,
#               'jigsaw%20google':,
#               'makani':,
#               'x': }

# Analysis Class Object
class Analysis:
  def __init__(self, term):
      self.term = term
      self.subjectivity = 0
      self.sentiment = 0
      self.url = 'https://www.google.com/search?q={0}&source=lmns&tbm=nws&tbs=qdr:m'.format(self.term) # Google News Monthly Feed
      
  def run(self):
    file = []
    response = requests.get(self.url)
    #print(response.text) # debugging / review response results
    soup = BeautifulSoup(response.text,'html.parser')
    headline_results = soup.find_all('div', class_='st')
    for h in headline_results:
      temp = str(h)
      temp = re.sub('\ |\?|\.|\!|\/|\;|\:', ' ', temp)
      temp = re.sub('\<.*?>', ' ', temp)
      temp = re.sub('\xa0','',temp)
      temp = re.sub('\s{2,}', ' ', temp) # Test code
      temp = temp.strip('<div class="st">')
      file.append(temp) 
      blob = TextBlob(h.get_text())
      self.sentiment += blob.sentiment.polarity / len(headline_results)
      self.subjectivity += blob.sentiment.subjectivity / len(headline_results)
    return file

In [0]:
# Analysis Function Run for Each Bet
def run_analysis(bet, keywords):
  file = []
  a = Analysis(keywords) # Insert keyword terms in Boolean logic, use '+' between terms
  new = a.run()
  
  # File output and formatting
  file.append('Bet: '+ bet+ '<br>')
  file.append('Keywords Search: ' + str(a.term) +'<br>')
  file.append('Query Link: '+ '<a href=\"' + a.url + '\\">'+ a.url + '</a>' + '<br>')
  file.append('Subjectivity: '+ str(round(a.subjectivity,5)) + ' Sentiment: ' + str(round(a.sentiment,5))+" <br>")
  # Iterate through Analysis object to append results
  for row in new:
    file.append("<br>"+row+"<br>")
  return file

In [0]:
# File Writer Function Run for Each Analysis Row for Each Bet
def file_writer(filename, input):
  with open(filename,"w") as f:
      print(input, file=f)

In [0]:
# Review Text Files: Iterate Over Bet Filename List
def review_text_files(filename):
  with open(filename, 'r')as f:
    for row in f:
      print(row)

In [0]:
# Run Analysis for Each Bet and Output to txt File
for count, bet in enumerate(other_bets):
  # Initialize file naming through iterative loop
  txt_filename = (str(bet) + ".html")
  analysis_file = run_analysis(bet, keywords[count])
  # Text file output of analsis contents
  
  open(txt_filename,"w").write("")
  for item in analysis_file:
    with open(txt_filename, "a") as myfile:
      myfile.write(item + '\n')

#   file_writer(txt_filename,item+'\n')
  review_text_files(txt_filename)

In [0]:
# Save Output to Google Drive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)  

# Get the folder id where the file will be saved the
# Iterate for all Bet txt files and save results to Google Drive
for bet in other_bets:
  file = drive.CreateFile({'parents':[{u'id': '1G4yxH_4Dz3WvG2mmc1GiqiAFvIUvRf5I'}]})
  results_file = str(bet + '.html')
  file.SetContentFile(results_file) 
  file.Upload()