In [1]:
# 1. Import the necessary functions from edgartools
from edgar import *
import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
from tqdm.auto import tqdm
from edgar import Company
import multiprocessing as mp
import swifter
import time


In [2]:
# 2. Tell the SEC who you are
set_identity("yishaibz@gmail.com")

# base data

In [3]:
industries = pd.read_csv(r'./edgar api/sic_industry_code.csv')
industries

Unnamed: 0,SIC Code,Office,Industry Title
0,100,Industrial Applications and Services,AGRICULTURAL PRODUCTION-CROPS
1,200,Industrial Applications and Services,AGRICULTURAL PROD-LIVESTOCK & ANIMAL SPECIALTIES
2,700,Industrial Applications and Services,AGRICULTURAL SERVICES
3,800,Industrial Applications and Services,FORESTRY
4,900,Industrial Applications and Services,"FISHING, HUNTING AND TRAPPING"
...,...,...,...
439,8880,Office of International Corp Fin,AMERICAN DEPOSITARY RECEIPTS
440,8888,Office of International Corp Fin,FOREIGN GOVERNMENTS
441,8900,Office of Trade & Services,"SERVICES-SERVICES, NEC"
442,9721,Office of International Corp Fin,INTERNATIONAL AFFAIRS


In [4]:
industries[industries['Industry Title'].str.contains("agri", case=False, na=False)]

Unnamed: 0,SIC Code,Office,Industry Title
0,100,Industrial Applications and Services,AGRICULTURAL PRODUCTION-CROPS
1,200,Industrial Applications and Services,AGRICULTURAL PROD-LIVESTOCK & ANIMAL SPECIALTIES
2,700,Industrial Applications and Services,AGRICULTURAL SERVICES
95,2870,Industrial Applications and Services,AGRICULTURAL CHEMICALS


In [5]:

# SEC endpoints and headers
TICKER_URL = "https://www.sec.gov/files/company_tickers.json"
HEADERS = {"User-Agent": "Your Name (your.email@example.com)"}

# Step 1: Get the master list of companies
response = requests.get(TICKER_URL, headers=HEADERS)
companies = response.json()

# Convert JSON to DataFrame
df_tickers = pd.DataFrame.from_dict(companies, orient='index')
df_tickers

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1018724,AMZN,AMAZON COM INC
4,1652044,GOOGL,Alphabet Inc.
...,...,...,...
9703,1884046,SPKLU,Spark I Acquisition Corp
9704,1884046,SPKLW,Spark I Acquisition Corp
9705,2019804,HONDW,HCM II Acquisition Corp.
9706,2019804,HONDU,HCM II Acquisition Corp.


In [6]:
df_tickers[df_tickers['title'].str.contains("nutri", case=False, na=False)]

Unnamed: 0,cik_str,ticker,title
515,1725964,NTR,Nutrien Ltd.
4803,1676047,NTRB,NutriBand Inc.
6136,1160420,ADIA,"Adia Nutrition, Inc."
8032,1676047,NTRBW,NutriBand Inc.


In [7]:
#add industry

In [15]:
print(time.time())
time.sleep(10)
print(time.time())

1741698986.458953
1741698996.4626973


In [20]:
def get_sic(cik):
    time.sleep(0.1)  # Ensures no more than 10 requests per second
    return Company(cik).sic

In [22]:
tqdm.pandas()
df_tickers['industry'] = df_tickers['cik_str'].progress_apply(get_sic)

  0%|          | 0/9708 [00:00<?, ?it/s]


KeyboardInterrupt



In [25]:
from dask.distributed import Client, Lock
import time

# Limit Dask to 4 processes
client = Client(processes=True, n_workers=4, threads_per_worker=1)

# Create a distributed lock
rate_lock = Lock("rate-limit-lock")

def get_sic(cik):
    # Only one worker can execute this block at a time
    with rate_lock:
        time.sleep(1/9)  # Wait roughly 0.111 seconds to limit to 9 requests/second
        return Company(cik).sic

# Using swifter with progress_apply
import swifter
df_tickers['industry'] = df_tickers['cik_str'].swifter.apply(get_sic)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 56718 instead


Dask Apply:   0%|          | 0/17 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/9708 [00:00<?, ?it/s]

In [27]:
df_tickers.to_csv(r'./edgar api/companies_info.csv',index=False)

In [29]:
df_tickers[df_tickers['industry']=='2870']

Unnamed: 0,cik_str,ticker,title,industry
515,1725964,NTR,Nutrien Ltd.,2870
768,1324404,CF,"CF Industries Holdings, Inc.",2870
1076,941221,ICL,ICL Group Ltd.,2870
1097,1285785,MOS,MOSAIC CO,2870
1654,825542,SMG,SCOTTS MIRACLE-GRO CO,2870
2754,1425292,UAN,"CVR PARTNERS, LP",2870
3855,1769484,BIOX,Bioceres Crop Solutions Corp.,2870
4317,5981,AVD,AMERICAN VANGUARD CORP,2870
4948,1705843,CBUS,"Cibus, Inc.",2870
5368,1794276,YCQH,YCQH Agricultural Technology Co. Ltd,2870


In [30]:
df_tickers[df_tickers['ticker']=='UAN']

Unnamed: 0,cik_str,ticker,title,industry
2754,1425292,UAN,"CVR PARTNERS, LP",2870


# analyse

https://github.com/dgunning/edgartools/blob/main/docs/quick-guide.md

In [13]:
# 3. Start using the library
filings = get_filings()
filings = filings.filter(date="2015-01-01:")

In [14]:
#by cik
filings.filter(cik=1725964)

[1;38;5;245m╭─[0m[1;38;5;245m─────────────────────────────────────────────────[0m[1;38;5;245m SEC Filings [0m[1;38;5;245m─────────────────────────────────────────────────[0m[1;38;5;245m─╮[0m
[1;38;5;245m│[0m                                                                                                                 [1;38;5;245m│[0m
[1;38;5;245m│[0m   [1m [0m[1mForm   [0m[1m [0m [1m [0m[1m       CIK[0m[1m [0m [1m [0m[1mTicker[0m[1m [0m [1m [0m[1mCompany                               [0m[1m [0m [1m [0m[1mFiling Date[0m[1m [0m [1m [0m[1mAccession Number   [0m[1m [0m  [1;38;5;245m│[0m
[1;38;5;245m│[0m  ─────────────────────────────────────────────────────────────────────────────────────────────────────────────  [1;38;5;245m│[0m
[1;38;5;245m│[0m    6-K      [2m [0m[2m   1725964[0m[2m [0m [33m [0m[33mNTR   [0m[33m [0m [1;32m [0m[1;32mNutrien Ltd.                          [0m[1;32m [0m  2025-02-28   [2m [0m[2m

In [10]:
?filings.filter

[1;31mSignature:[0m
[0mfilings[0m[1;33m.[0m[0mfilter[0m[1;33m([0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mform[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mList[0m[1;33m[[0m[0mUnion[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mint[0m[1;33m][0m[1;33m][0m[1;33m,[0m [0mNoneType[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mamendments[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfiling_date[0m[1;33m:[0m [0mOptional[0m[1;33m[[0m[0mstr[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdate[0m[1;33m:[0m [0mOptional[0m[1;33m[[0m[0mstr[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcik[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mint[0m[1;33m,[0m [0mList[0m[1;33m[[0m[0mUnion[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mint[0m[1;33m][0m[1;33m][0m[1;33m][0

# comapny

In [11]:
company = Company("NTR")


In [12]:
company.sic

'2870'

The financials property returns a Financials instance. This instance has methods that return the balance sheet, income statement and cash flow statement.

```
from edgar import MultiFinancials

filings = company.latest("10-K", 5)
financials = MultiFinancials(filings)
```

In [7]:
ntr_filings = company.latest("10-K", 5)
ntr_filings  = MultiFinancials(ntr_filings)
ntr_filings

TypeError: 'NoneType' object is not iterable

In [19]:
df_tickers

Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1018724,AMZN,AMAZON COM INC
4,1652044,GOOGL,Alphabet Inc.
...,...,...,...
9703,1884046,SPKLU,Spark I Acquisition Corp
9704,1884046,SPKLW,Spark I Acquisition Corp
9705,2019804,HONDW,HCM II Acquisition Corp.
9706,2019804,HONDU,HCM II Acquisition Corp.


## by industry