### IMPORTS

In [1]:
import ealib

import logging
import pandas as pd
from collections import Counter
import random
from typing import Tuple
import yfinance as yf

In [2]:
# EDGAR API requires request header
req_header = {"User-Agent": "roberto.brera.24@outlook.com"}

### Logging tests/setup

In [3]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
logging.info("logging info")

2024-07-18 13:00:59,375 - INFO - logging info


### COMP TICKERS

In [4]:
# Dictionary to pandas dataframe
tickers_df = ealib.get_tickers_df(req_header)
tickers_df.head()

2024-07-18 13:01:02,234 - INFO - Request to https://www.sec.gov/files/company_tickers.json returned successfully. Response code: 200


Unnamed: 0,cik_str,ticker,title
0,320193,AAPL,Apple Inc.
1,789019,MSFT,MICROSOFT CORP
2,1045810,NVDA,NVIDIA CORP
3,1652044,GOOGL,Alphabet Inc.
4,1018724,AMZN,AMAZON COM INC


In [None]:
# Examples on searches performable on this dataframe
tickers_df.iloc[133]
tickers_df[tickers_df["ticker"] == "NVDA"]
tickers_df[0:4]

In [None]:
# Search func examples
ealib.find_ticker(tickers_df, "NVDA")["cik_str"].iloc[0]

# Random companies
tickers_df.iloc[97:111]

In [None]:
ealib.find_title_substring(tickers_df, "morgan")

### COMP METADATA

In [None]:
# Find cik for some company given name substring
query_substr =  "gamestop"
query_ticker = ealib.find_title_substring(tickers_df, query_substr)
query_ticker

### Filtering: Ticker df (yfinance, e.g. marketCap)

In [None]:
# Suppose we are iterating through tickers_df
curr_ticker = tickers_df.iloc[10]
curr_yticker = yf.Ticker(curr_ticker["ticker"])

In [None]:
# Search for particular info keys
ealib.find_dict_key_substr(curr_yticker.info, ["cap"])
ealib.find_dict_key_substr(curr_yticker.info, ["currency"])

# Check other yfinance info, to then compare with SEC
ealib.find_keys_containing_all_substrs(curr_yticker.info, ["cash", "operating"])

In [None]:
# Search for the information, returning NA is not found
ealib.yf_info(curr_ticker["ticker"], "marketCap")
ealib.yf_info(curr_ticker["ticker"], "currency")

In [None]:
# Generate info series for a df of tickers
marketCap_series = tickers_df[:10]["ticker"].apply(lambda x: ealib.yf_info(x, "marketCap"))

# How many nans have we got?
marketCap_series.isna().sum()

In [None]:
# Now apply filtering
market_cap_threshhold = 15*(10**9)
mask = marketCap_series < market_cap_threshhold
filtered_series = marketCap_series[mask]

# How many?
mask.sum()

### Filtering: Ticker df req--> Comp mtd (e.g. filing)

#### This functionality already implemented in donload_select_filings

In [None]:
# Suppose we are iterating through tickers_df
curr_ticker = tickers_df.iloc[6849]
curr_ticker

In [None]:
# REQUEST the company dataframe for the iterator ticker
curr_comp_mtd = ealib.get_response_dict(ealib.metadata_url(curr_ticker["cik_str"] ), req_header, mrps=1)

# Extract filing information into dataframe
if not curr_comp_mtd or not curr_comp_mtd.get("filings") or not curr_comp_mtd.get("filings", {}).get("recent"):
    logging.warning(f'Could not find comp_mtd["filings"]["recent"] dictionary for {curr_ticker["ticker"]}')
curr_filings_df = pd.DataFrame.from_dict(curr_comp_mtd["filings"]["recent"])
curr_filings_df

In [None]:
# Filter the filings for this specific ticker, and for specific filing query 
query_forms = ["424B5", "S-3"]
max_days = 180
curr_select_filings = ealib.filter_filings(curr_filings_df, "filingDate", "form", query_forms, max_days)

# If we find no filings, then the company is excluded
curr_select_filings.empty

### Filtering: Ticker df req--> Company Fact (e.g. oper. cash flow)

In [None]:
# Suppose we are iterating through tickers_df
curr_ticker = tickers_df.iloc[2098]
curr_ticker

In [None]:
# Request company facts dictionary
"""
TODO: Handle failure for this and other similar requests (handled in company_facts_df)
"""
comp_facts = ealib.get_response_dict(ealib.companyfacts_url(curr_ticker["cik_str"]), req_header, mrps=1)

In [None]:
# What is the right subdictionary?
""" Most common facts entries by far:
["facts"]["ifrs-full"]
["facts"]["us-gaap"]
"""
comp_facts_subdict = comp_facts["facts"]["us-gaap"]
comp_facts_subdict

In [None]:
# Script to count different company facts subdictionary entries
tot_tickers = 1000
start_ticker = 8080
keys_counter = Counter()
index_dict = {}
for index, row in tickers_df[start_ticker:start_ticker + tot_tickers].iterrows():
    """ 
    IMPORTANT: Always handle None returns i.e. unsuccessful requests
    """
    cfacts = ealib.get_response_dict(ealib.companyfacts_url(row["cik_str"]), req_header, mrps=8)
    if cfacts == None:
        keys_counter["FAILED_REQS"] += 1
        index_dict["FAILED_REQS"] = index
        continue
    for key in cfacts["facts"].keys():
        keys_counter[key] += 1
        index_dict[key] = index

print(keys_counter)
print(index_dict)
print(f'tot_tickers = {tot_tickers}')
print(f'us-gaap + ifrs + failed reqs = {keys_counter["FAILED_REQS"] + keys_counter["us-gaap"] + keys_counter["ifrs-full"]}')


In [None]:
# Testing of abstracted Overall function to get a company fact dataframe from an arbitrary ticker
num_tests = 1000
none_rets = 0
for _ in range(num_tests):
    random_number = random.randint(1, 10000)
    res = ealib.company_fact_df(
        tickers_df.iloc[random_number],
        ["us-gaap", "ifrs-full"],
        ["NetCashProvidedByUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"], 
        True,
        req_header, 
        8
    )
    # Unpack defensively
    if res is not None:
        res_units, selected_fact, res_df = res
        logging.info(f'company_fact_df selected fact: {selected_fact}')
        """
        print(f'res_units = {res_units}')
        print(res_df)
        """
    else:
        none_rets += 1

print(f'{none_rets} none returns out of {num_tests} tests')

### Calculating cash burn rate

In [None]:
# Suppose we are iterating through tickers_df
curr_ticker = tickers_df.iloc[6666]
curr_ticker

In [None]:
# Get the dataframe for some metric
unit, selected_comp_fact, ocf_df = ealib.company_fact_df(
    curr_ticker, 
    ["us-gaap", "ifrs-full"], 
    ["NetCashProvidedByUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"], 
    True,
    req_header, 
    8
)

ocf_df

#### filing date column "filed" vs "end"

In [None]:
# Use filtering function to filter recent filings and sort list
max_days = 180
ocf_df_filt = ealib.filter_filings(ocf_df, filing_date_col="filed", form_col="form", query_forms=[""], max_days=max_days)
ocf_df_filt

In [None]:
# Finally calculate cash burn rate (expected negative)
ealib.ocf_average_daily_burn_rate(ocf_df_filt)

### Converting between currencies

In [None]:
from_currency = "USD"
to_currency = "USD"
forex_ticker =  f"{from_currency}{to_currency}=X"

ealib.yf_info(forex_ticker, "previousClose")

## Overall filtering function

In [5]:
# Main parameter setting and fucntion call
comp_out_df, missing_data_df = ealib.screen_select_companies(
    # general parameters:
        req_header=req_header, 
        mrps=8, 
        tickers_df=tickers_df[:1200], 
        root_dir="Selected filings", 
    # filtering parameters:
        query_forms = ["424B5", "S-3"], 
        max_days = 180, 
        max_market_cap = 15*(10**9), 
        max_ocf_daily_burn_rate = 0, 
        ocf_max_days = 180, 
        ocf_filing_date_col = "filed",
    # download parameters:
        out_df_sort_key = "Avg yearly OCF burn / Market Cap", 
        write_txt = False, 
        write_pdf = True
) 

2024-07-18 13:01:07,617 - INFO - Starting screening procedure for Apple Inc., at index 0
2024-07-18 13:01:08,152 - INFO - Request to https://data.sec.gov/submissions/CIK0000320193.json returned successfully. Response code: 200
2024-07-18 13:01:08,170 - INFO - No filings for Apple Inc. match the specified criteria. Iterating to next company.
2024-07-18 13:01:08,172 - INFO - Starting screening procedure for MICROSOFT CORP, at index 1
2024-07-18 13:01:08,172 - INFO - Exceeded maximum requests per second. Sleeping for 0.10499835014343262 seconds...
2024-07-18 13:01:08,653 - INFO - Request to https://data.sec.gov/submissions/CIK0000789019.json returned successfully. Response code: 200
2024-07-18 13:01:08,665 - INFO - No filings for MICROSOFT CORP match the specified criteria. Iterating to next company.
2024-07-18 13:01:08,667 - INFO - Starting screening procedure for NVIDIA CORP, at index 2
2024-07-18 13:01:08,667 - INFO - Exceeded maximum requests per second. Sleeping for 0.110470056533813

In [None]:
# Debugging box

In [6]:
# Render dataframes
comp_out_df

Unnamed: 0,Company name,OCF Currency,OCF Name,Avg daily OCF burn,USD Avg daily OCF burn,Market Cap,Market Cap Currency,USD Market Cap,CIQ ticker,Avg yearly OCF burn / Market Cap
0,REINSURANCE GROUP OF AMERICA INC,USD,NetCashProvidedByUsedInOperatingActivities,-11026050.0,-11026050.0,14165017600,USD,14165020000.0,NYSE:RGA,-0.280224
0,"MADRIGAL PHARMACEUTICALS, INC.",USD,NetCashProvidedByUsedInOperatingActivities,-923124.1,-923124.1,5741318144,USD,5741318000.0,Nasdaq:MDGL,-0.057883
0,FRANKLIN RESOURCES INC,USD,NetCashProvidedByUsedInOperatingActivities,-2009782.0,-2009782.0,12531487744,USD,12531490000.0,NYSE:BEN,-0.057736
0,CYTOKINETICS INC,USD,NetCashProvidedByUsedInOperatingActivities,-1033127.0,-1033127.0,6654879232,USD,6654879000.0,Nasdaq:CYTK,-0.055888
0,INSMED Inc,USD,NetCashProvidedByUsedInOperatingActivities,-1452017.0,-1452017.0,12322010112,USD,12322010000.0,Nasdaq:INSM,-0.042422
0,"Revolution Medicines, Inc.",USD,NetCashProvidedByUsedInOperatingActivities,-893791.9,-893791.9,7658649600,USD,7658650000.0,Nasdaq:RVMD,-0.042013
0,"Toll Brothers, Inc.",USD,NetCashProvidedByUsedInOperatingActivities,-1414038.0,-1414038.0,13156649984,USD,13156650000.0,NYSE:TOL,-0.038692
0,ATI INC,USD,NetCashProvidedByUsedInOperatingActivities,-673223.1,-673223.1,7439083008,USD,7439083000.0,NYSE:ATI,-0.032579
0,"Vaxcyte, Inc.",USD,NetCashProvidedByUsedInOperatingActivities,-784282.9,-784282.9,8852648960,USD,8852649000.0,Nasdaq:PCVX,-0.031893
0,AGCO CORP /DE,USD,NetCashProvidedByUsedInOperatingActivities,-644736.7,-644736.7,7669392384,USD,7669392000.0,NYSE:AGCO,-0.030264


In [7]:
missing_data_df

In [8]:
# Save to Excel
file_name = "Selected filings.xlsx"
with pd.ExcelWriter(file_name, engine='openpyxl') as writer:
    # Write each DataFrame to a different sheet
    missing_data_df.to_excel(writer, sheet_name='Companies with missing data', index=False)
    comp_out_df.to_excel(writer, sheet_name='Verified Companies', index=False)

### COMPANY FACTS

In [None]:
comp_facts = ealib.get_response_dict(ealib.companyfacts_url(query_cik), req_header, mrps=mrps)
us_gaap_facts_dict = comp_facts["facts"]["us-gaap"]
us_gaap_facts_dict.keys()

In [None]:
comp_facts["facts"]["dei"].keys()

In [None]:
"""
Shares outstanding
"""
pd.DataFrame(comp_facts["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"])

In [None]:

"""
Market cap (proxy)
"""
pd.DataFrame(comp_facts["facts"]["dei"]["EntityPublicFloat"]["units"]["USD"])

In [None]:

test_qr_substr = "stock"
filtered_keys = find_dict_key_substr(us_gaap_facts_dict, test_qr_substr)

"""
sub_dict = {key: us_gaap_facts_dict[key] for key in filtered_keys if key in us_gaap_facts_dict}
"""
filtered_keys

In [None]:
us_gaap_facts_dict["CommonStockValue"]["units"]["USD"]

In [None]:
# Convert to pd data frame
pd.DataFrame(us_gaap_facts_dict["CommonStockValue"]["units"]["USD"])

### COMPANY CONCEPT

In [None]:
rev_concept = ealib.get_response_dict(
    ealib.companyconcept_url(query_cik, "/us-gaap/Revenues"), 
    req_header,
    mrps=mrps
)

# Coincides with company facts request
pd.DataFrame(rev_concept["units"]["USD"])