### Data Collection for Database
1. Stock Data: Stock List [NASDAQ-100], General Information, Fundamental data, Yearly Data, Intraday Data, Analyst Ratings]
2. Sentiment Data: Using Vader for each stock

In [28]:
# imports
import yahoo_fin.stock_info as si
import yfinance as yf
import requests
import pandas as pd
from bs4 import BeautifulSoup
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [29]:
# stock list
def fetch_stock_list():
    url = "https://api.nasdaq.com/api/quote/list-type/nasdaq100"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}
    response = requests.get(url, headers=headers)
    data = response.json()
    stock_list = [item["symbol"] for item in data["data"]["data"]["rows"]]
    return stock_list

In [31]:
# name, sector, industry, description
def stock_general_info(ticker):

    # name, sector, industry - yahoo finance
    url = f"https://finance.yahoo.com/quote/{ticker}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    name = soup.find("h1", class_="D(ib)").text
    sector = soup.find("span", text="Sector(s)").find_next_sibling("span").text
    industry = soup.find("span", text="Industry").find_next_sibling("span").text

    # general info - marketwatch
    url = f"https://www.marketwatch.com/investing/stock/{ticker}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    description = soup.find("p", class_="description__text").text.strip()
    

    return (name,sector,industry,description)

In [27]:
# fundamental ratios and data
def fundamental_data(ticker):
    data = si.get_quote_table(ticker)
    return data

In [32]:
# yearly prices
def yearly_prices(ticker):
    data = yf.download(ticker, period="1y", interval="1d")
    return data[['Adj Close']].reset_index()

In [26]:
# intraday prices
def intraday_prices(ticker):
    data = yf.download(ticker, period="1d", interval="2m")
    return data[['Adj Close']].reset_index()

In [11]:
# sentiment analysis
def find_sentiment(ticker):
    url = f'https://finviz.com/quote.ashx?t={ticker}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, features='html.parser')
    news_table = soup.find(id='news-table')

    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = []
    for row in news_table.findAll('tr'):
        date_data = row.td.text.split(' ')
        if len(date_data) == 1:
            date = date
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
        headline = row.a.text
        print(headline)
        sentiment_score = analyzer.polarity_scores(headline)['compound']
        sentiment_scores.append(sentiment_score)


    overall_sentiment_score = sum(sentiment_scores) / len(sentiment_scores)
    return(overall_sentiment_score)



In [12]:
df = pd.read_csv("stock_prices.csv")

In [13]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2,2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700
3,2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600
4,2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100
...,...,...,...,...,...,...,...
3090,2023-03-04,22362.923828,22405.177734,22198.980469,22353.349609,22353.349609,11166012913
3091,2023-03-05,22354.144531,22613.685547,22307.142578,22435.513672,22435.513672,13317001733
3092,2023-03-06,22436.816406,22584.292969,22331.314453,22429.757812,22429.757812,17353192895
3093,2023-03-07,22428.322266,22527.417969,22011.261719,22219.769531,22219.769531,22765452204
