Get the symbols from the [SEC](https://www.sec.gov/file/company-tickers)

* Download from the link
* Parse the file
* Save it to /data/symbols.csv [CIK, ticker, Name]

In [None]:
pip install --upgrade pip

In [None]:
!pip install ipywidgets
!pip install tqdm

In [None]:
import csv
import json
import requests
from pathlib import Path
from tqdm.notebook import tqdm

In [None]:
tickers_url = 'https://www.sec.gov/files/company_tickers.json'
user_agent = 'TextCorpusLabs/EDGAR'
data_folder = Path('./data')
symbol_file = Path('./data/symbols.csv')
field_map = [('cik_str','CIK'), ('ticker','Ticker'), ('title','Name')]

if not data_folder.exists():
    data_folder.mkdir(parents = True)
if symbol_file.exists():
    symbol_file.unlink()

In [None]:
with requests.Session() as session:
    session.headers['User-Agent'] = user_agent
    with session.get(tickers_url) as result:
        if result.status_code == 200:
            tickers = json.loads(result.text)
        else:
            print('Error retrieving tickers')
            exit(1)

In [None]:

with open(symbol_file, mode = 'w', encoding = 'utf-8', newline = '') as fp:
    writer = csv.writer(fp, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_ALL)    
    writer.writerow([map[1] for map in field_map])
    for ticker in tqdm(tickers.values()):
        row: list[str | None] = [None] * len(field_map)
        for i in range(0, len(field_map)):
            field = field_map[i][0]
            if field in ticker:
                row[i] = ticker[field]
        writer.writerow(row)
