In [0]:
!pip install -q yahoo_fin

### Importing Required Libraries

In [3]:
import requests
import http
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

import pandas as pd
from yahoo_fin import stock_info
from pandas_datareader import DataReader
import numpy as np
import urllib3
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import os
import time
import matplotlib.pyplot as plt
%matplotlib inline

from nltk.sentiment.vader import SentimentIntensityAnalyzer

             requires requests_html, which is not installed.
             
             Install using: 
             pip install requests_html
             
             After installation, you may have to restart your Python session.


  from pandas.util.testing import assert_frame_equal


### Retrieving S&P 500 stock information

In [0]:
#Function to retrieve S&P 500 recommendations from Yahoo Finance
def get_recommendations(tickers):
    recommendations = []
    
    for ticker in tickers:
  
        #Yahoo Finance URL for current stock ticker
        url = 'https://query2.finance.yahoo.com/v10/finance/quoteSummary/'
        url+= ticker
        url += '?formatted=true&crumb=swg7qs5y9UP&lang=en-US&region=US&' \
                'modules=upgradeDowngradeHistory,recommendationTrend,' \
                'financialData,earningsHistory,earningsTrend,industryTrend&' \
                'corsDomain=finance.yahoo.com'
                  
        data = requests.get(url)
        if(not data.ok):
            recommendataion = -1
        
        #Parsing JSON response to retrieve recommendation rating
        try:
          result = data.json()['quoteSummary']['result'][0]
          recommendation = result['financialData']['recommendationMean']['fmt']
        except:
          recommendation = -1

        recommendations.append(recommendation)
    
    return recommendations


#S&P 500 Tickers
tickers = stock_info.tickers_sp500()
recommendations = get_recommendations(tickers)

In [5]:
#Creating a dataframe with stocks and their respective recommendations
columns = ['Company','Recommendations']
data = pd.DataFrame(list(zip(tickers,recommendations)), columns = columns)

#Converting Recommendations column to float
data['Recommendations'] = pd.to_numeric(data['Recommendations'])

data = data[data.Recommendations != -1]

data.sort_values(by=['Recommendations'],)

Unnamed: 0,Company,Recommendations
387,PWR,1.5
284,LKQ,1.5
72,BSX,1.6
150,DXCM,1.6
255,J,1.6
...,...,...
1,AAL,3.3
154,ED,3.4
61,BEN,3.4
309,MKC,3.5


In [6]:
#Creating Seperate DataFrames to house hold, sell and buy stocka
hold = data[data.Recommendations == 3]
buy = data[data.Recommendations <= 1.5]
sell = data[data.Recommendations >= 4.5]

#Combining theses data frames into one big DF
data_lst = [buy,hold,sell]
new_data = pd.concat(data_lst)
new_data.reset_index(level = 0, inplace = True)

new_data

Unnamed: 0,index,Company,Recommendations
0,284,LKQ,1.5
1,387,PWR,1.5
2,28,ALLE,3.0
3,97,CLX,3.0
4,119,CTSH,3.0
5,136,DISCK,3.0
6,258,JKHY,3.0
7,267,KIM,3.0
8,274,KSS,3.0
9,276,L,3.0


In [0]:
#Function is used to populate a List with Recommended Stocks from Yahoo Finance
def get_tickers(data):
  tickers = []

  for index, row in data.iterrows():
    tickers.append(row.Company)

  return tickers

#Function is used to fetch news information of stocks recomended by Yahoo Finance
def fetch_news(tickers):
  news = {}

  for ticker in tickers:
    url = 'https://finviz.com/quote.ashx?t=' + ticker

    #Scraping FinWiz webpage to get news involving current stock
    req = requests.get(url=url, headers={'User-Agent':'my-app/0.0.1'}) 
    response = req.content
  

    html_page = BeautifulSoup(response)
    news_table = html_page.find(id='news-table')
    
    news[ticker] = news_table

  return news

tickers = get_tickers(new_data)

news = fetch_news(tickers)

In [24]:
#Function is used to parse news info and create a dataframe involving each company and relevant news
def news_data(news):
  news_table = []

  for file_name, curr_news in news.items():
    for each in curr_news.findAll('tr'):
      
      #Scraping News Table to retrieve relevant news and timestamp
      text = each.a.get_text()
      data = each.td.text.split()

      if(len(data) == 1):
        time = data[0]
      else:
        date = data[0]
        time = data[1]
      
      ticker = file_name.split('_')[0]

      ticker_info = [ticker,date,time,text]
      news_table.append(ticker_info)

  #Creating of a DataFrame with ticker and news information
  columns = ['Ticker','Date','Time','Headline']
  parsed_news = pd.DataFrame(news_table,columns=columns)

  parsed_news['Date'] = pd.to_datetime(parsed_news.Date).dt.date

  return parsed_news

parsed_news = news_data(news)

#Grouping the headlines for each comapny into one string
parsed_news = parsed_news.groupby(['Ticker'], as_index = False).agg({'Headline':''.join}, Inplace = True)
parsed_news

Unnamed: 0,Ticker,Headline
0,ALLE,Why Is Allegion (ALLE) Down 0.9% Since Last Ea...
1,CLX,Should We Waste This Crisis?Here's what boomin...
2,CTSH,8 Stocks Diamond Hill Capital Continues to Buy...
3,DISCK,"John Paulson Adds 2 Stocks to Portfolio, Boost..."
4,JKHY,UNIFY Financial Credit Union Moving its Symita...
5,KIM,Here's Why You Should Hold On to Kimco Realty ...
6,KSS,A Bad Earnings Report for Nordstrom Is Good En...
7,L,Dow futures gain as investors watch for earnin...
8,LEG,Edited Transcript of LEG earnings conference c...
9,LKQ,"Jeff Ubben's ValueAct Sells FedEx, Buys 2 New ..."


### Sentiment Analysis