In [1]:
%config Completer.use_jedi = False

In [5]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
import csv

In [6]:
# Get ticker list

# This is specifically for the small/medium cap Healthcare sector with stock price between 10-50
# on the date of 2/26/2021.

def get_tickers():

    tickers = []
    urls = []
    X = [x for x in range(0, 1001, 100)] # Since we know there are 5 to start

    for x in X:
        url = f'https://finance.yahoo.com/screener/unsaved/dc285ff9-d928-4d6f-af8c-91a4f115eb22?count=100&dependentField=sector&dependentValues=Healthcare&offset={x}'
        urls.append(url)


    for url in urls:
        html = requests.get(url).text
        soup = BeautifulSoup(html, 'lxml')
        tables = soup.find_all('tr')[1:]

        # curr_table = tables[0]

        # data = float(curr_table.find_all('td', attrs={'aria-label': 'Price (Intraday)'})[0].text)

        # print(data)
        # data = curr_table.find_all('td', attrs={'aria-label': 'Symbol'})[0].text

        for i in range(len(tables)):
            curr_table = tables[i]
            name = curr_table.find_all('td', attrs={'aria-label': 'Symbol'})[0].text

            tickers.append(name)


    return tickers


In [7]:
tickers = get_tickers()

In [8]:
ticker_series = pd.Series(data=tickers)

In [10]:
ticker_series.to_csv('HealthCareTickers.csv', index=False)

In [118]:
# Add in new date data

def get_df(tickers, verbose=1):
    """
    Input a list of tickers, get out the daily stock data
    for that ticker since Jan 2, 2020. This include the date,
    open, close, high, low and volume for each ticker given.
    The data is organized by data ascending and grouped by each ticker.
    
    Set verbose=1 if you would like an update after every ticker data has been downloaded.
    Otherwise, set verbose=0
    """
    
    columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Ticker']

    df = pd.DataFrame(columns=columns)
    
    for tick in tickers:
        urls = [f'https://finance.yahoo.com/quote/{tick}/history?p={tick}',
                f'https://finance.yahoo.com/quote/{tick}/history?period1=1582588800&period2=1601596800&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true',
                f'https://finance.yahoo.com/quote/{tick}/history?period1=1577923200&period2=1589414400&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true',]

        for url in urls[::-1]:   
            html = requests.get(url).text
            soup = BeautifulSoup(html, 'lxml')
            tables = soup.find_all('tr', attrs={'class': "BdT Bdc($seperatorColor) Ta(end) Fz(s) Whs(nw)"})


            for i in range(len(tables)):
                curr_table = tables[::-1][i]
                curr_data = [x.text for x in curr_table.find_all('td')] + [tick]
                if '-' in curr_data:
                    continue
                if len(curr_data) != 8:
                    continue                        
                curr_data.pop(5)

                # Change str values to floats/ints.
                for j in range(1,5):
                    
                    curr_data[j] = float(curr_data[j].replace(',', ''))

                curr_data[5] = curr_data[5].replace(',', '')
                try:
                    curr_data[5] = int(curr_data[5])
                except ValueError:
                    continue

                if (curr_data[0], curr_data[5]) in zip(df['Date'], df['Ticker']):
                    break
                else:
                    df.loc[df.shape[0]] = curr_data
                    
        if verbose==1:            
            print(f'Ticker: {tick}, {tickers.index(tick)+1} out of {len(tickers)} completed')
                    
    return df

In [119]:
health_care_data = get_df(tickers, verbose=1)

Ticker: ABCL, 1 out of 159 completed
Ticker: MRVI, 2 out of 159 completed
Ticker: ACAD, 3 out of 159 completed
Ticker: NVTA, 4 out of 159 completed
Ticker: SGFY, 5 out of 159 completed
Ticker: RCM, 6 out of 159 completed
Ticker: SHC, 7 out of 159 completed
Ticker: CHNG, 8 out of 159 completed
Ticker: GTBIF, 9 out of 159 completed
Ticker: EXEL, 10 out of 159 completed
Ticker: ONEM, 11 out of 159 completed
Ticker: HALO, 12 out of 159 completed
Ticker: NVST, 13 out of 159 completed
Ticker: TGTX, 14 out of 159 completed
Ticker: PACB, 15 out of 159 completed
Ticker: SANA, 16 out of 159 completed
Ticker: APHA, 17 out of 159 completed
Ticker: AMWL, 18 out of 159 completed
Ticker: PRGO, 19 out of 159 completed
Ticker: IOVA, 20 out of 159 completed
Ticker: CERT, 21 out of 159 completed
Ticker: CVET, 22 out of 159 completed
Ticker: ALLO, 23 out of 159 completed
Ticker: FGEN, 24 out of 159 completed
Ticker: SDC, 25 out of 159 completed
Ticker: PINC, 26 out of 159 completed
Ticker: OCDX, 27 out of

In [121]:
health_care_data.to_csv('HealthcareStockFeb2021.csv')