In [2]:
pip install twython

Collecting twython
  Downloading https://files.pythonhosted.org/packages/24/80/579b96dfaa9b536efde883d4f0df7ea2598a6f3117a6dd572787f4a2bcfb/twython-3.8.2-py3-none-any.whl
Installing collected packages: twython
Successfully installed twython-3.8.2


In [18]:
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen
from urllib.request import Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

n = 10 #Number of articles per ticker
tickers = ['LEJU', 'SAM', 'VOXX', 'LOOP', 'ADAP', 'MAT' , 'SIM', 'NXGN', 'HYMC', 'TSLA', 'AAPL', 'MSFT','LLNW','TC','USX','GLBS']

In [19]:
# Get Data
finviz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass



Recent News Headlines for LEJU: 
Is Now The Time To Look At Buying Leju Holdings Limited (NYSE:LEJU)? ( Oct-26-20 09:57AM )
A Look Into Leju Holdings Price Over Earnings ( Oct-23-20 04:01PM )
A Trio of Graham-Style Stocks to Consider ( Oct-07-20 01:00PM )
Slammed 28% Leju Holdings Limited (NYSE:LEJU) Screens Well Here But There Might Be A Catch ( Sep-18-20 08:39AM )
Take Care Before Diving Into The Deep End On Leju Holdings Limited (NYSE:LEJU) ( Aug-29-20 10:31AM )
Leju Holdings Ltd (LEJU) Q2 2020 Earnings Call Transcript ( Aug-27-20 01:24PM )
Leju Reports First Half Year 2020 Results ( Aug-20-20 05:51AM )
Leju to Report First Half 2020 Financial Results on August 20, 2020 ( Aug-06-20 11:00PM )
E-House to Become Leju's Majority Shareholder; E-House, Alibaba and Leju to Jointly Build Online Real Estate Platform; Alibaba to Increase Stake in E-House ( Jul-31-20 09:40AM )
Announcing: Leju Holdings (NYSE:LEJU) Stock Increased An Energizing 158% In The Last Year ( Jul-29-20 01:36PM )


Re

In [20]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        text = x.a.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text])

In [21]:
import nltk
nltk.download('vader_lexicon')
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [22]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
LEJU    2020-10-26  09:57AM  0.147  0.853  0.000   -0.2263
LEJU    2020-10-23  04:01PM  0.000  1.000  0.000    0.0000
LEJU    2020-10-07  01:00PM  0.000  1.000  0.000    0.0000
LEJU    2020-09-18  08:39AM  0.119  0.750  0.131    0.0516
LEJU    2020-08-29  10:31AM  0.116  0.610  0.274    0.3818


              Date     Time    neg    neu    pos  compound
Ticker                                                    
SAM     2020-10-29  11:55AM  0.000  0.656  0.344    0.4939
SAM     2020-10-28  12:00PM  0.000  0.708  0.292    0.5106
SAM     2020-10-28  08:31AM  0.000  1.000  0.000    0.0000
SAM     2020-10-27  02:13PM  0.000  0.769  0.231    0.2023
SAM     2020-10-26  01:43PM  0.237  0.763  0.000   -0.4767


              Date     Time  neg    neu    pos  compound
Ticker                                                  
VOXX    2020-10-26  04:01PM  0.0  0.610  0.390    0.75