# Show basic usage of NER and Sentiment analysis

## imports

In [None]:
! pip install nltk

In [None]:
! pip install asent

In [None]:
! pip install neuralcoref

In [None]:
! pip install flair

## Demonstrate Sentiment and Entity Analysis

In [None]:
import nltk
### Uncomment it when the script runs for the first time 
nltk.download('vader_lexicon')

import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd

In [None]:
import spacy
from spacy import displacy
import flair
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# import asent
# import neuralcoref
# from textblob import TextBlob
# from spacytextblob.spacytextblob import SpacyTextBlob

text = "Apple is looking at buying U.K. startup for $1 billion"

# # neural coreference resolution
# coef = spacy.load('en_core_web_sm')
# neuralcoref.add_to_pipe(coef)
# doc = coef(text)
# print(doc._.coref_clusters)

# get a nlp model to do sentiment analysis on text
# model = flair.models.TextClassifier.load('en-sentiment')
sia = SentimentIntensityAnalyzer()
# tokenize input text
# sentence = flair.data.Sentence(text)
print(sia.polarity_scores(text))
# make sentiment prediction
# model.predict(sentence)
# extract sentiment direction and confidence (label and score) object
# sentiment = sentence
# print(sentiment)
# nlp = spacy.load("en_core_web_sm")
# nlp.add_pipe('spacytextblob')
# nlp.add_pipe("sentencizer")
# nlp.add_pipe('asent_en_v1')
# get the sentiment analysis
# doc = nlp(text)
# asent.visualize(doc, style='analysis')

# get a nlp model to do entity recognition on text
ner = spacy.load("en_core_web_sm")
# entity_analyzer = ner.add_pipe("ner")
# ner.add_pipe(entity_analyzer)
# get the entity recognition
doc = ner(text)
# visualize the results with displacy
displacy.render(doc, style="ent", jupyter=True)


## Demonstrate sentiment analysis with NLTK

In [None]:
PHRASES = ['Well, this week news broke that they had been in talks with Twitter for a $4 billion acquisition, so it looks like they’re still pretty desirable.',\
           'Wow, how things change.',\
           'Traveloka are poised to become public companies in coming months, kickstarting a coming-out party for Southeast Asia’s long-overlooked internet scene.',\
           'Former DHS Secretary Janet Napolitano spoke with Yahoo Finance about comprehensive immigration reform.']

for phrase in PHRASES:
  print(f'{phrase}')
  print(sia.polarity_scores(phrase))
  sentence = flair.data.Sentence(phrase)
  model.predict(sentence)
  print(sentence.get_label())

# News + Sentiment

In [None]:
def get_articles_sentiments(keywrd, startd, sources_list = None, show_all_articles = False):
   
  news_client = NewsApiClient(api_key= NEWS_API_KEY)
  if type(startd) == str:
    my_date = datetime.strptime(startd,'%d-%b-%Y')
  else:
    my_date = startd
  # business_en_sources = news_client.get_sources('business','en')
  if sources_list:
    articles = news_client.get_everything(q = keywrd,
                                      from_param = my_date.isoformat(), 
                                      to = (my_date + timedelta(days = 1)).isoformat(),
                                      language="en",
                                      sources = ",".join(sources_list),
                                      sort_by="relevancy",
                                      page_size = 100)
  else:
     articles = news_client.get_everything(q = keywrd,
                                       from_param = my_date.isoformat(), 
                                       to = (my_date + timedelta(days = 1)).isoformat(),
                                       language="en",
                                       sort_by="relevancy",
                                       page_size = 100)
  article_content = ''

  date_sentiments = {}
  date_sentiments_list = []
  seen = set()
  
  for article in articles['articles']:
    if str(article['title']) in seen:
      continue
    else:
      seen.add(str(article['title']))
      article_content = str(article['title']) + '. ' + str(article['description'])      
      sentiment = sia.polarity_scores(article_content)['compound']
      date_sentiments.setdefault(my_date, []).append(sentiment)
      date_sentiments_list.append((sentiment, article['url'],article['title'],article['description']))

  date_sentiments_l = sorted(date_sentiments_list, key=lambda tup: tup[0], reverse = True)   
  sent_list = list(date_sentiments.values())[0]

  return pd.DataFrame(date_sentiments_list, columns=['Sentiment','URL','Title','Description'])

In [None]:
# Easy version when we don't filter the business source -- seems to be relevant though, but the description
# Get all sources in en

# return_articles = get_articles_sentiments(keywrd= 'Tesla stock' ,startd = '9-Apr-2021',sources_list = None, show_all_articles= True)
# return_articles.Sentiment.hist(bins=30,grid=False)
# print(return_articles.Sentiment.mean())
# print(return_articles.Sentiment.count())
# print(return_articles.Description)

# Every Day execution

In [None]:
# Easy version when we don't filter the business source -- seems to be relevant though, but the description
# Get all sources in en

my_date = date.today() - timedelta(days=1) 

return_articles = get_articles_sentiments(keywrd= 'Tesla stock' ,startd = my_date, sources_list = None, show_all_articles= True)
return_articles.Sentiment.hist(bins=30, grid=False)
print(return_articles)
# print(return_articles.Sentiment.mean())
# print(return_articles.Sentiment.count())
# print(return_articles.Description)

In [None]:
return_articles["Date"] = my_date

In [None]:
return_articles.head()

In [None]:
return_articles.sort_values(by='Sentiment', ascending=True)[['Sentiment','URL', 'Description','Title']].head(2)

In [None]:
return_articles.sort_values(by='Sentiment', ascending=False)[['Sentiment','URL', 'Description','Title']].head(2)

In [None]:
return_articles.to_csv("TSLA_news_sentiments_"+my_date.isoformat()+".csv")