Choose 20 companies for analysis

In [1]:
companies = [
    "AAPL", "MSFT", "AMZN", "GOOGL", "TSLA",
    "META", "BRK.B", "JNJ", "JPM", "V",
    "PG", "NVDA", "HD", "DIS", "MA",
    "PYPL", "INTC", "NFLX", "KO", "PEP"
]

In [46]:
!pip install yfinance



Using yahoo finance to create a function for fetching the latest news of a company

In [127]:
import yfinance as yf
def get_news(symbol):
    stock = yf.Ticker(symbol)
    news = stock.news
    return news

In [128]:
get_news("AAPL")[0]['title']

"Apple, Microsoft, or Nvidia: Which Will Be the World's Most Valuable Company a Year From Now?"

Clean the text by removing punctuations and making all lowercase

In [129]:
import string
def clean_text(headline):
  headline=headline.lower()
  headline = headline.translate(str.maketrans('', '', string.punctuation))
  return headline

Store all the news in a list, create a dictionary with the key as company name and the value as news

In [68]:
news_dataset = {company: get_news(company) for company in companies}
news_dataset={}
for i in companies:
  for j in range(len(get_news(i))):
    news=clean_text(get_news(i)[j]['title'])
    if news_dataset.get(i)==None:
      news_dataset[i]=[news]
    else:
      news_dataset[i].append(news)

In [69]:
news_dataset

{'AAPL': ['apple microsoft or nvidia which will be the worlds most valuable company a year from now',
  'baby boomers and millennials face an intimidating task as stocks trade near record highs morning brief',
  'renters are struggling more than homeowners in america’s tough housing market report says',
  'why im thinking about selling some of my apple stock',
  'best stock to buy right now apple vs amazon',
  'apple and meta have discussed ai partnership wsj reports',
  'nvidia and microsoft will make up over 40 of this popular etf heres why this alternative etf could be a better option',
  'apple meta have discussed an ai partnership'],
 'MSFT': ['apple microsoft or nvidia which will be the worlds most valuable company a year from now',
  'do microsofts nasdaqmsft earnings warrant your attention',
  'baby boomers and millennials face an intimidating task as stocks trade near record highs morning brief',
  'missed out on nvidia heres 1 spectacular etf to buy instead',
  '2 top tech st

Sentiment Analysis using VADER

In [70]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [89]:
def analyze_sentiment_nltk(news):
    sentiments = []
    for article in news:
        sentiment_score=sia.polarity_scores(article)['compound']
        sentiments.append(sentiment_score)
    if sentiments:
        avg_sentiment = sum(sentiments)/len(sentiments)
    else:
        avg_sentiment = 0
    return avg_sentiment

In [88]:
analyze_sentiment_nltk(news_dataset["AAPL"])

0.12691249999999998

Sentiment Analysis using TextBlob

In [91]:
from textblob import TextBlob

In [110]:
def analyze_sentiment_textblob(news):
  sentiments = []
  for article in news:
    sentiment_score = TextBlob(article).sentiment.polarity
    sentiments.append(sentiment_score)
  if sentiments:
    avg_sentiment = sum(sentiments)/len(sentiments)
  else:
    avg_sentiment = 0
  return avg_sentiment

In [111]:
analyze_sentiment_textblob(news_dataset["AAPL"])

0.2248015873015873

Gather the sentiment scores and make suggestions to buy or sell stock based on values of the scores

In [125]:
nltk_sentiment_scores = [analyze_sentiment_nltk(news_dataset[company]) for company in companies]
textblob_sentiment_scores = [analyze_sentiment_textblob(news_dataset[company]) for company in companies]
overall=[(nltk_sentiment_score+textblob_sentiment_score)/2 for nltk_sentiment_score,textblob_sentiment_score in zip(nltk_sentiment_scores,textblob_sentiment_scores)]
def sentiment_suggestion(score):
    if score > 0.05:
        return 'positive'
    elif score < -0.05:
        return 'negative'
    else:
        return 'neutral'
nltk_suggestions = [sentiment_suggestion(score) for score in nltk_sentiment_scores]
textblob_suggestions = [sentiment_suggestion(score) for score in textblob_sentiment_scores]
overall_suggestions=[sentiment_suggestion(score) for score in overall]

Display the final dataset along with the suggestions

In [126]:
df = pd.DataFrame()
df['Company']=companies
df['NLTK Sentiment Score']=nltk_sentiment_scores
df['NLTK Sentiment Suggestions']=nltk_suggestions
df['TextBlob Sentiment Score']=textblob_sentiment_scores
df['TextBlob Sentiment Suggestions']=textblob_suggestions
df['Overall Sentiment Suggestions']=overall_suggestions
df

Unnamed: 0,Company,NLTK Sentiment Score,NLTK Sentiment Suggestions,TextBlob Sentiment Score,TextBlob Sentiment Suggestions,Overall Sentiment Suggestions
0,AAPL,0.126912,positive,0.224802,positive,positive
1,MSFT,0.224612,positive,0.236574,positive,positive
2,AMZN,0.3898,positive,0.258929,positive,positive
3,GOOGL,0.168037,positive,-0.057143,negative,positive
4,TSLA,0.022613,neutral,-0.0125,neutral,neutral
5,META,0.1149,positive,0.028274,neutral,positive
6,BRK.B,0.061187,positive,0.185417,positive,positive
7,JNJ,0.084288,positive,0.045833,neutral,positive
8,JPM,-0.134337,negative,0.0625,positive,neutral
9,V,-0.196763,negative,-0.140625,negative,negative
