In [None]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt
import nltk
nltk.downloader.download('vader_lexicon')

In [None]:
finviz_url = 'https://finviz.com/quote.ashx?t='
Companies = ['AMZN', 'GOOG', 'FB'] # stock companies

news_tables = {} # Declare empty dictionary to store results from finviz

In [None]:
for company in Companies:
    url = finviz_url + company # so it's going to loop first and get the company and url and then page and then comments from people after it will go back and take another company like GOOG
    req = Request(url=url, headers={'user-agent':'my-app'}) # Specify headers or else access will be denied
    response = urlopen(req)
    soup = BeautifulSoup(response,features='html.parser')
    news_table = soup.find(id='news-table')
    news_tables.update({company:news_table})


# print(news_tables)


In [None]:
parsed_data = []

for company, news_table in news_tables.items(): # this will go through the keys we created above, company and texts
    for row in news_table.find_all('tr'): # the text in in tr, inorder to get all text we need to write find_all if we say find we will get  only tr
        comment = row.a.text
        date_data = row.td.text.split(' ')
        if len(date_data) == 1:
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
        parsed_data.append([company,date,time,comment])

        
# print(parsed_data)

In [None]:

df = pd.DataFrame(parsed_data, columns=['Company','Date','Time','Comments'])
# Analyse your text
vader = SentimentIntensityAnalyzer()

# print(df)

In [None]:
function = lambda x: vader.polarity_scores(x)['compound']
# We want to loop through our comments column
df['Compound'] = df['Comments'].apply(function) #The compound score is the sum of positive, negative & neutral scores which is then normalized between -1(most extreme negative) and +1 (most extreme positive). The more Compound score closer to +1, the higher the positivity of the text. Above text is 49.2% Positive, 0% Negative, 50.8% Neutral
# Convert date column to date time
df['Date'] = pd.to_datetime(df.Date).dt.date



# print(vader.polarity_scores('I hate eating cheese'))
print(df)

In [None]:
plt.figure(figsize=(6,8))
# Using groupby makes us to have one date entry
mean_df = df.groupby(['Company','Date']).mean()
#print(mean_df)

# Allow us to have date as x-axis
mean_df = mean_df.unstack()
# Remove compound column
mean_df = mean_df.xs('Compound', axis='columns').transpose()
mean_df.plot(kind='bar')
plt.show()
#print(mean_df)