## Importing Libraries

In [1]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:


# Raw URL
finviz_url = 'https://finviz.com/quote.ashx?t='
tickers = ['AAPL', 'GOOGL', 'TSLA']

news_tables = {}

# Iterate over each ticker and create a URL to parse
for ticker in tickers:
    url = finviz_url + ticker

# Requesting data from this URL and creating data that allow access to this data
    req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0'})
    response = urlopen(req)

# Parse the response using BeautifulSoup
    html = BeautifulSoup(response, 'html.parser')

# getting the html object of the news-table id
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table
    



URLError: <urlopen error [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond>

if a user-agent is not specified, we won't be given access to download the data from the website

## Manipulating the Finviz Data

In [None]:

# Creating a data structure which in this case is a list
parsed_data = []

# iterating over the news_table dict

for ticker, news_table in news_tables.items():
    for row in news_table.findAll('tr'):

# Scrapping the text in the anchor tag in the tr        
        title = row.a.get_text()
        
# scrapping the date, removing newline characters or whitespace with strip and slipting it
        date_data = row.td.text.strip().split(' ')

# Checking if the html object have only time or date or have both
        if len(date_data) == 1:
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]

        parsed_data.append([ticker, date, time, title]) 

print(parsed_data)           

## Sentiment Analysis

In [None]:
df =pd.DataFrame(parsed_data, columns=['ticker', 'date', 'time', 'title'])

vader = SentimentIntensityAnalyzer()

# Creating a function to get the compound score
func = lambda x: vader.polarity_scores(x)['compound']
    
# Adding a new column to store the compund score    
df['compound'] = df['title'].apply(func)
  

print(df.head())

## Visualization

In [None]:
# convert the date column from sting to date format
df['date'] = pd.to_datetime(df.date).dt.date

# Trend over time
plt.figure(figsize=(10,8))

# checking if a day has a positive or negative on an average on each day
mean_df = df.groupby(['ticker', 'date']).mean()
mean_df = mean_df.unstack()
mean_df = mean_df.xs('compound', axis='columns').transpose()
mean_df.columns = mean_df.columns.get_level_values(1)

# Create the bar plot using seaborn
sns.barplot(data=mean_df)

plt.show()



