# Get Stock Market Data the top 20 NASDAQ stocks

This script gets the latest available data for the top 20 NASDAQ Stocks
And saves it in a json 
Outputs the top 100 NASDAQ stocks - NASDAQ_DD_MM_YYYY.csv
Outputs the top 20 NASDAQ stock daily EOD stocks and saves as a json - stock_data_DD_MM_YYYY.json


This uses the API provided by Tiingo which requires an API key

## 0. Imports

In [24]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import pendulum
import json
import numpy as np
import plotly.graph_objects as go
from datetime import datetime

## 1. Get list of top 100 NASDAQ stocks

Here we webscrape the top 100 NASDAQ stocks from www.slickcharts.com

In [5]:
url = 'https://www.slickcharts.com/nasdaq100'

# need use of a header to pretend we are not a python script otherwise our access gets blocked
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

html_content = requests.get(url, headers=headers).content

soup = BeautifulSoup(html_content, 'html.parser')
res = []
for entry in soup.find_all('tr')[1:]:
    temp = str(entry).split("\n")
    if any('a href="/symbol/' in x for x in temp):
        temp = np.array(temp)[[2,3,4,5,6,7]]
        temp = [BeautifulSoup(x, "lxml").text for x in temp]
        temp[2] = float(temp[2])
        temp[3] = float(temp[3].replace(",",""))
        temp[4] = float(temp[4])
        temp[5] = float(temp[5].replace("(","").replace("%)",""))

        # print(temp)
        res.append(temp)

topStocks = pd.DataFrame(np.array(res),columns=['Company','Symbol','Weight','Price','Chg','Chg%'])
topStocks.head(5)

Unnamed: 0,Company,Symbol,Weight,Price,Chg,Chg%
0,Apple Inc,AAPL,13.712,167.6,-3.92,-2.29
1,Microsoft Corp,MSFT,10.571,277.57,-8.58,-3.0
2,Amazon.com Inc,AMZN,6.947,133.05,-5.18,-3.75
3,Tesla Inc,TSLA,4.554,867.0,-23.0,-2.58
4,Alphabet Inc,GOOG,3.657,114.9,-3.22,-2.73


## 2. Get stock data from the past 5 years for the top 20 stocks

### 2.1 Tiingo API function

In [6]:
def getTradeData(symbol, API_key):
    startDate = pendulum.now().subtract(years=5).format("YYYY-MM-DD")
    headers = {
        'Content-Type': 'application/json'
    }
    requestResponse = requests.get(f"https://api.tiingo.com/tiingo/daily/{symbol}/prices?startDate={startDate}&token={API_key}", headers=headers)
    return requestResponse.json()

### 2.2 Call API on top 20 symbols

In [16]:
tradeDataJson = {}

for company, symbol in zip(topStocks.head(10).Company,topStocks.head(10).Symbol):
    tradeDataJson[symbol] = {'companyName': company,
                             'data':getTradeData(symbol)}

In [17]:
with open('tradeData_22_08_2022.json', 'w') as f:
  json.dump(tradeDataJson, f, ensure_ascii=False)

In [19]:
f = open('tradeData_22_08_2022.json')
tradeDataJson = json.load(f)
for k in tradeDataJson:
    # only need the adjusted values so drop all the other columns
    tradeDataJson[k]['data'] = pd.DataFrame(tradeDataJson[k]['data']).drop(columns=['close','high','low','open','volume','divCash','splitFactor']).set_index('date')


In [20]:
tradeDataJson['AAPL']

{'companyName': 'Apple Inc',
 'data':                             adjClose     adjHigh      adjLow     adjOpen  \
 date                                                                       
 2017-08-22T00:00:00.000Z   37.908339   37.960535   37.490773   37.540596   
 2017-08-23T00:00:00.000Z   37.955790   38.072044   37.694811   37.739889   
 2017-08-24T00:00:00.000Z   37.787340   38.136102   37.616518   38.062554   
 2017-08-25T00:00:00.000Z   37.927319   38.093397   37.787340   37.877496   
 2017-08-28T00:00:00.000Z   38.309297   38.435042   37.943927   37.993750   
 ...                              ...         ...         ...         ...   
 2022-08-15T00:00:00.000Z  173.190000  173.390000  171.345000  171.520000   
 2022-08-16T00:00:00.000Z  173.030000  173.710000  171.661800  172.780000   
 2022-08-17T00:00:00.000Z  174.550000  176.150000  172.570000  172.770000   
 2022-08-18T00:00:00.000Z  174.150000  174.900000  173.120000  173.750000   
 2022-08-19T00:00:00.000Z  171.520000  

# 3. Peak at the data we will be using to predict

In [30]:
fig = go.Figure(data=[go.Candlestick(x=tradeDataJson['AAPL']['data'].index,
                open=tradeDataJson['AAPL']['data']['adjOpen'],
                high=tradeDataJson['AAPL']['data']['adjHigh'],
                low=tradeDataJson['AAPL']['data']['adjLow'],
                close=tradeDataJson['AAPL']['data']['adjClose'])])
fig.update_layout(
    title='AAPL')
fig.show()

In [31]:
fig = go.Figure(data=[go.Candlestick(x=tradeDataJson['MSFT']['data'].index,
                open=tradeDataJson['MSFT']['data']['adjOpen'],
                high=tradeDataJson['MSFT']['data']['adjHigh'],
                low=tradeDataJson['MSFT']['data']['adjLow'],
                close=tradeDataJson['MSFT']['data']['adjClose'])])
fig.update_layout(
    title='MSFT')
fig.show()