In [1]:
import pandas as pd
import io
import requests
import json


_EXCHANGE_LIST = ['nyse', 'nasdaq', 'amex']

_SECTORS_LIST = set(['Consumer Non-Durables', 'Capital Goods', 'Health Care',
       'Energy', 'Technology', 'Basic Industries', 'Finance',
       'Consumer Services', 'Public Utilities', 'Miscellaneous',
       'Consumer Durables', 'Transportation'])


# headers and params used to bypass NASDAQ's anti-scraping mechanism in function __exchange2df
headers = {
    'authority': 'nasdaq.com',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'sec-fetch-site': 'cross-site',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-user': '?1',
    'sec-fetch-dest': 'document',
    'referer': 'https://github.com/shilewenuw/get_all_tickers/issues/2',
    'accept-language': 'en-US,en;q=0.9',
    'cookie': 'AKA_A2=A; NSC_W.TJUFEFGFOEFS.OBTEBR.443=ffffffffc3a0f70e45525d5f4f58455e445a4a42378b',
}

def params(exchange):
    return (
        ('exchange', exchange),
        ('download', 'true'),
        ('tableonly', 'true'),

    )

def params_region(region):
    return (
        ('region', region),
        ('download', 'true'),
        ('tableonly', 'true'),
    )

# I know it's weird to have Sectors as constants, yet the Regions as enums, but
# it makes the most sense to me
class Region:
    AFRICA = 'AFRICA'
    EUROPE = 'EUROPE'
    ASIA = 'ASIA'
    AUSTRALIA_SOUTH_PACIFIC = 'AUSTRALIA+AND+SOUTH+PACIFIC'
    CARIBBEAN = 'CARIBBEAN'
    SOUTH_AMERICA = 'SOUTH+AMERICA'
    MIDDLE_EAST = 'MIDDLE+EAST'
    NORTH_AMERICA = 'NORTH+AMERICA'

class SectorConstants:
    NON_DURABLE_GOODS = 'Consumer Non-Durables'
    CAPITAL_GOODS = 'Capital Goods'
    HEALTH_CARE = 'Health Care'
    ENERGY = 'Energy'
    TECH = 'Technology'
    BASICS = 'Basic Industries'
    FINANCE = 'Finance'
    SERVICES = 'Consumer Services'
    UTILITIES = 'Public Utilities'
    DURABLE_GOODS = 'Consumer Durables'
    TRANSPORT = 'Transportation'


# get tickers from chosen exchanges (default all) as a list
def get_tickers(NYSE=True, NASDAQ=True, AMEX=True):
    tickers_list = []
    if NYSE:
        tickers_list.extend(__exchange2list('nyse'))
    if NASDAQ:
        tickers_list.extend(__exchange2list('nasdaq'))
    if AMEX:
        tickers_list.extend(__exchange2list('amex'))
    return tickers_list


def get_tickers_filtered(mktcap_min=None, mktcap_max=None, sectors=None):
    tickers_list = []
    for exchange in _EXCHANGE_LIST:
        tickers_list.extend(__exchange2list_filtered(exchange, mktcap_min=mktcap_min, mktcap_max=mktcap_max, sectors=sectors))
    return tickers_list


def get_biggest_n_tickers(top_n, sectors=None):
    df = pd.DataFrame()
    for exchange in _EXCHANGE_LIST:
        temp = __exchange2df(exchange)
        df = pd.concat([df, temp])
        
    df = df.dropna(subset={'marketCap'})
    df = df[~df['symbol'].str.contains("\.|\^")]

    if sectors is not None:
        if isinstance(sectors, str):
            sectors = [sectors]
        if not _SECTORS_LIST.issuperset(set(sectors)):
            raise ValueError('Some sectors included are invalid')
        sector_filter = df['sector'].apply(lambda x: x in sectors)
        df = df[sector_filter]

    def cust_filter(mkt_cap):
        if not mkt_cap:
            return float(0.0)
        return float(mkt_cap) / 1e6
    df['marketCap'] = df['marketCap'].apply(cust_filter)

    df = df.sort_values('marketCap', ascending=False)
    if top_n > len(df):
        raise ValueError('Not enough companies, please specify a smaller top_n')

    return df.iloc[:top_n]['symbol'].tolist()


def get_tickers_by_region(region):
    if region is not None:
        response = requests.get('https://api.nasdaq.com/api/screener/stocks', headers=headers, params=params_region(region))
        text_data= response.text
        json_dict= json.loads(text_data)
        columns = list(json_dict['data']['headers'].keys())
        df = pd.DataFrame(json_dict['data']['rows'], columns=columns)
        return df
    else:
        raise ValueError('Please enter a valid region (use a Region.REGION as the argument, e.g. Region.AFRICA)')

def __exchange2df(exchange):
    response = requests.get('https://api.nasdaq.com/api/screener/stocks', headers=headers, params=params(exchange))
    text_data= response.text
    json_dict= json.loads(text_data)
    columns = list(json_dict['data']['headers'].keys())
    df = pd.DataFrame(json_dict['data']['rows'], columns=columns)
    return df

def __exchange2list(exchange):
    df = __exchange2df(exchange)
    # removes weird tickers
    df_filtered = df[~df['symbol'].str.contains("\.|\^")]
    return df['symbol'].tolist()

# market caps are in millions
def __exchange2list_filtered(exchange, mktcap_min=None, mktcap_max=None, sectors=None):
    df = __exchange2df(exchange)
    df = df.dropna(subset={'marketCap'})
    df = df[~df['symbol'].str.contains("\.|\^")]

    if sectors is not None:
        if isinstance(sectors, str):
            sectors = [sectors]
        if not _SECTORS_LIST.issuperset(set(sectors)):
            raise ValueError('Some sectors included are invalid')
        sector_filter = df['sector'].apply(lambda x: x in sectors)
        df = df[sector_filter]

    def cust_filter(mkt_cap):
        if not mkt_cap:
            return float(0.0)
        return float(mkt_cap) / 1e6
    df['marketCap'] = df['marketCap'].apply(cust_filter)
    if mktcap_min is not None:
        df = df[df['marketCap'] > mktcap_min]
    if mktcap_max is not None:
        df = df[df['marketCap'] < mktcap_max]
    return df['symbol'].tolist()


# save the tickers to a CSV
def save_tickers(NYSE=True, NASDAQ=True, AMEX=True, filename='tickers.csv'):
    tickers2save = get_tickers(NYSE, NASDAQ, AMEX)
    df = pd.DataFrame(tickers2save)
    df.to_csv(filename, header=False, index=False)

def save_tickers_by_region(region, filename='tickers_by_region.csv'):
    tickers2save = get_tickers_by_region(region)
    df = pd.DataFrame(tickers2save)
    df.to_csv(filename, header=False, index=False)


if __name__ == '__main__':

    # tickers of all exchanges
    tickers = get_tickers()
    print(tickers[:5])

    # tickers from NYSE and NASDAQ only
    tickers = get_tickers(AMEX=False)

    # default filename is tickers.csv, to specify, add argument filename='yourfilename.csv'
    save_tickers()

    # save tickers from NYSE and AMEX only
    save_tickers(NASDAQ=False)

    # get tickers from Asia
    tickers_asia = get_tickers_by_region(Region.ASIA)
    print(tickers_asia[:5])

    # save tickers from Europe
    save_tickers_by_region(Region.EUROPE, filename='EU_tickers.csv')

    # get tickers filtered by market cap (in millions)
    filtered_tickers = get_tickers_filtered(mktcap_min=500, mktcap_max=2000)
    print(filtered_tickers[:5])

    # not setting max will get stocks with $2000 million market cap and up.
    filtered_tickers = get_tickers_filtered(mktcap_min=2000)
    print(filtered_tickers[:5])

    # get tickers filtered by sector
    filtered_by_sector = get_tickers_filtered(mktcap_min=200e3, sectors=SectorConstants.FINANCE)
    print(filtered_by_sector[:5])

    # get tickers of 5 largest companies by market cap (specify sectors=SECTOR)
    top_5 = get_biggest_n_tickers(5)
    print(top_5)

['A', 'AA', 'AAC', 'AAIC', 'AAIC^B']
  symbol                                               name lastsale  \
0   AACG   ATA Creativity Global American Depositary Shares    $4.36   
1    ACH  Aluminum Corporation of China Limited American...   $10.65   
2   ADAG            Adagene Inc. American Depositary Shares   $17.00   
3   AEHL  Antelope Enterprise Holdings Limited Common St...    $2.81   
4   AGBA            AGBA Acquisition Limited Ordinary Share   $10.58   

  netchange pctchange      marketCap    country ipoyear  volume  \
0     -0.56  -11.382%   136688184.00      China          711232   
1     -0.25   -2.294%  7251658677.00      China    2001  113153   
2     -0.73   -4.117%   717056940.00      China    2021   11062   
3     -0.33   -10.51%    10386479.00      China          551861   
4      0.00     0.00%    63215500.00  Hong Kong    2019      21   

              sector                 industry                           url  
0  Consumer Services  Other Consumer Services  /m

In [24]:
exchange=input('NYSE,NASDAQ,AMEX:')

NYSE,NASDAQ,AMEX:NASDAQ


In [25]:
filtered_tickers = __exchange2list_filtered(exchange, mktcap_min=200, mktcap_max=10000000, sectors=None)

In [26]:
len(filtered_tickers)

2202

In [27]:
df=pd.DataFrame({'Symbols':filtered_tickers})
df

Unnamed: 0,Symbols
0,AACQ
1,AAL
2,AAOI
3,AAON
4,AAPL
...,...
2197,ZNGA
2198,ZNTL
2199,ZS
2200,ZUMZ


In [28]:
#df.to_csv(exchange+'.csv')