In [1]:
import pandas as pd
from pandas.tseries.offsets import BDay
import numpy as np
from datetime import date

import sys
import os

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(os.path.realpath("__file__")), ".."))
)

import data_loader.yahoo_finance as yahoo_finance
import utils.downloading_utils as downloading_utils
import utils.sql_utils as sql_utils
import utils.mapping as mapping
import utils.dataframe_utils as dataframe_utils

In [2]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)
pd.set_option("display.max_columns", None)

# Download Holdings Files

In [3]:
etf_urls = mapping.etf_urls
data_path = os.path.join(
    os.path.abspath(os.path.join(os.path.dirname(os.path.realpath("__file__")), "..")),
    "Data",
)

all_indices = pd.DataFrame()
for fund_name, url in etf_urls.items():
    print(fund_name)
    df_temp = downloading_utils.download_holdings(
        fund_name, url, download_folder=data_path
    )
    all_indices = pd.concat([all_indices, df_temp])

S&P 500


KeyboardInterrupt: 

In [None]:
sql_utils.write_sql_table(
    database_name="CODE_CAPITAL", table_name="holdings", df=all_indices, overwrite=False
)

In [None]:
# query = """
# SELECT *
# FROM holdings
# WHERE 1=1
# -- AND "INDEX" = 'S&P 500'
# -- AND TICKER = 'AAPL'
# AND DATE = '2025-12-15'
# """

# all_indices = sql_utils.read_sql_table(
#     "CODE_CAPITAL",
#     # table_name="holdings",
#     query=query,
# )

# all_indices

Unnamed: 0,DATE,INDEX,TICKER,NAME,MARKET_VALUE,WEIGHT,QUANTITY,PRICE,LOCATION,EXCHANGE,CURRENCY,FX_RATE
0,2025-12-15,S&P 500,A,AGILENT TECHNOLOGIES INC,4.588199e+08,0.000681,3284793.0,139.68,United States,New York Stock Exchange Inc.,USD,1.0
1,2025-12-15,S&P 500,AAPL,APPLE INC,4.776274e+10,0.070906,171635543.0,278.28,United States,NASDAQ,USD,1.0
2,2025-12-15,S&P 500,ABBV,ABBVIE INC,4.562614e+09,0.006773,20430835.0,223.32,United States,New York Stock Exchange Inc.,USD,1.0
3,2025-12-15,S&P 500,ABNB,AIRBNB INC CLASS A,6.370705e+08,0.000946,4961995.0,128.39,United States,NASDAQ,USD,1.0
4,2025-12-15,S&P 500,ABT,ABBOTT LABORATORIES,2.525335e+09,0.003749,20128608.0,125.46,United States,New York Stock Exchange Inc.,USD,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1502,2025-12-15,Russell 1000,ZG,ZILLOW GROUP INC CLASS A,2.479739e+06,0.000054,34260.0,72.38,United States,NASDAQ,USD,1.0
1503,2025-12-15,Russell 1000,ZION,ZIONS BANCORPORATION,6.334346e+06,0.000138,107380.0,58.99,United States,NASDAQ,USD,1.0
1504,2025-12-15,Russell 1000,ZM,ZOOM COMMUNICATIONS INC CLASS A,1.727861e+07,0.000378,193014.0,89.52,United States,NASDAQ,USD,1.0
1505,2025-12-15,Russell 1000,ZS,ZSCALER INC,1.677163e+07,0.000367,70982.0,236.28,United States,NASDAQ,USD,1.0


In [None]:
# sql_utils.delete_sql_rows(
#     database_name="CODE_CAPITAL",
#     table_name="holdings",
#     where_clause="date is null"
# )

# Store Company Data

## Company Prices

In [None]:
# check for stock splits
# reupload all data

In [None]:
def add_missing_tickers(df, ticker_list):
    """
    Ensures all tickers in ticker_list appear in df.
    Adds missing tickers with today's date as start_date.
    """

    # Find tickers that are missing from the dataframe
    existing = set(df["TICKER"])
    missing = [t for t in ticker_list if t not in existing]

    # Create rows for missing tickers
    if missing:
        new_rows = pd.DataFrame({"TICKER": missing, "START_DATE": "2000-01-01"})
        # Append to the original dataframe
        df = pd.concat([df, new_rows], ignore_index=True)

    return df

In [None]:
query = """
SELECT TICKER, MAX(DATE) AS START_DATE
FROM prices
GROUP BY TICKER
"""

max_dates = sql_utils.read_sql_table(query=query, database_name="CODE_CAPITAL")
max_dates["START_DATE"] = pd.to_datetime(max_dates["START_DATE"])
max_dates["START_DATE"] = max_dates["START_DATE"] + BDay(1)
# max_dates = dataframe_utils.df_to_dict(max_dates, "TICKER", "START_DATE")

start_date_mapping = dataframe_utils.df_to_dict(
    add_missing_tickers(max_dates, all_indices["TICKER"].unique()),
    "TICKER",
    "START_DATE",
)

In [None]:
client_prices = yahoo_finance.YahooDataClient(
    all_indices["TICKER"].unique(), max_workers=10
)

all_prices = client_prices.get_prices(start_date=start_date_mapping)

sql_utils.write_sql_table(
    table_name="prices", database_name="CODE_CAPITAL", df=all_prices, overwrite=False
)

2025-12-15 17:27:45 - INFO - Loading Prices


In [None]:
query = """
SELECT *
FROM prices
WHERE "TICKER" IN ('AAPL', 'MSFT')
ORDER BY DATE DESC, TICKER
"""

sql_utils.read_sql_table("CODE_CAPITAL", query=query)

Unnamed: 0,DATE,OPEN,HIGH,LOW,CLOSE,ADJ_CLOSE,VOLUME,DIVIDENDS,STOCK_SPLITS,TICKER
0,2025-12-15,280.019989,280.049988,272.839996,274.109985,274.109985,49752479.0,0.0,0.0,AAPL
1,2025-12-15,480.100006,480.720490,472.519989,474.820007,474.820007,23099397.0,0.0,0.0,MSFT
2,2025-12-12,277.795013,279.220001,276.820007,278.279999,278.279999,38360082.0,0.0,0.0,AAPL
3,2025-12-12,479.820007,482.450012,476.359985,478.529999,478.529999,20743536.0,0.0,0.0,MSFT
4,2025-12-11,279.095001,279.579987,273.809998,278.029999,278.029999,30285187.0,0.0,0.0,AAPL
...,...,...,...,...,...,...,...,...,...,...
13051,2000-01-05,55.562500,58.187500,54.687500,56.906250,34.883835,64059600.0,0.0,0.0,MSFT
13052,2000-01-04,0.966518,0.987723,0.903460,0.915179,0.770139,512377600.0,0.0,0.0,AAPL
13053,2000-01-04,56.781250,58.562500,56.125000,56.312500,34.519875,54119000.0,0.0,0.0,MSFT
13054,2000-01-03,0.936384,1.004464,0.907924,0.999442,0.841048,535796800.0,0.0,0.0,AAPL


## Financials

In [None]:
def find_missing_tickers(df, ticker_list):
    """
    Ensures all tickers in ticker_list appear in df.
    Adds missing tickers with today's date as start_date.
    """

    # Find tickers that are missing from the dataframe
    existing = set(df["TICKER"])
    missing = [t for t in ticker_list if t not in existing]

    return missing


query = """
SELECT DISTINCT TICKER
FROM company_info
WHERE DATE = (
    SELECT DATE
    FROM company_info
    GROUP BY DATE
    HAVING COUNT(*) > 10
    ORDER BY DATE DESC
    LIMIT 1
)
"""

t = sql_utils.read_sql_table(query=query, database_name="CODE_CAPITAL")

missing = find_missing_tickers(t, all_indices["TICKER"].unique())

missing

['A']

In [None]:
today = date.today()
if today.weekday() == 0:
    client = yahoo_finance.YahooDataClient(
        all_indices["TICKER"].unique(), max_workers=10
    )
    run = True
else:
    client = yahoo_finance.YahooDataClient(missing, max_workers=10)
    if missing:
        run = True
    else:
        run = False

In [None]:
if run:
    all_financial_annual = client.get_financials(
        annual=True, statement_type="financial"
    )
    all_financial_quarterly = client.get_financials(
        annual=False, statement_type="financial"
    )
    all_balancesheet_annual = client.get_financials(
        annual=True, statement_type="balance_sheet"
    )
    all_balancesheet_quarterly = client.get_financials(
        annual=False, statement_type="balance_sheet"
    )
    all_income_annual = client.get_financials(
        annual=True, statement_type="income_statement"
    )
    all_income_quarterly = client.get_financials(
        annual=False, statement_type="income_statement"
    )
    all_cashflow_annual = client.get_financials(annual=True, statement_type="cashflow")
    all_cashflow_quarterly = client.get_financials(
        annual=False, statement_type="cashflow"
    )

    sql_utils.write_sql_table(
        table_name="financial_annual",
        database_name="CODE_CAPITAL",
        df=all_financial_annual,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="financial_quarterly",
        database_name="CODE_CAPITAL",
        df=all_financial_quarterly,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="balancesheet_annual",
        database_name="CODE_CAPITAL",
        df=all_balancesheet_annual,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="balancesheet_quarterly",
        database_name="CODE_CAPITAL",
        df=all_balancesheet_quarterly,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="incomestatement_annual",
        database_name="CODE_CAPITAL",
        df=all_income_annual,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="incomestatement_quarterly",
        database_name="CODE_CAPITAL",
        df=all_income_quarterly,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="cashflow_annual",
        database_name="CODE_CAPITAL",
        df=all_cashflow_annual,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="cashflow_quarterly",
        database_name="CODE_CAPITAL",
        df=all_cashflow_quarterly,
        overwrite=False,
    )

2025-12-15 17:28:22 - INFO - Loading Company Financial
2025-12-15 17:28:48 - INFO - Loading Company Financial
2025-12-15 17:29:14 - INFO - Loading Company Balance Sheet
2025-12-15 17:29:36 - INFO - Loading Company Balance Sheet
2025-12-15 17:29:59 - INFO - Loading Company Income Statement
2025-12-15 17:30:00 - INFO - Loading Company Income Statement
2025-12-15 17:30:02 - INFO - Loading Company Cashflow
2025-12-15 17:30:25 - INFO - Loading Company Cashflow


In [None]:
query = """
SELECT *
FROM cashflow_quarterly
WHERE "TICKER" IN ('AAPL')
ORDER BY DATE DESC, TICKER
"""

sql_utils.read_sql_table("CODE_CAPITAL", query=query)

Unnamed: 0,DATE,TICKER,REPORT_DATE,FREE_CASH_FLOW,REPURCHASE_OF_CAPITAL_STOCK,REPAYMENT_OF_DEBT,CAPITAL_EXPENDITURE,INCOME_TAX_PAID_SUPPLEMENTAL_DATA,END_CASH_POSITION,BEGINNING_CASH_POSITION,CHANGES_IN_CASH,FINANCING_CASH_FLOW,CASH_FLOW_FROM_CONTINUING_FINANCING_ACTIVITIES,NET_OTHER_FINANCING_CHARGES,PROCEEDS_FROM_STOCK_OPTION_EXERCISED,CASH_DIVIDENDS_PAID,COMMON_STOCK_DIVIDEND_PAID,NET_COMMON_STOCK_ISSUANCE,COMMON_STOCK_PAYMENTS,NET_ISSUANCE_PAYMENTS_OF_DEBT,NET_LONG_TERM_DEBT_ISSUANCE,LONG_TERM_DEBT_PAYMENTS,INVESTING_CASH_FLOW,CASH_FLOW_FROM_CONTINUING_INVESTING_ACTIVITIES,NET_INVESTMENT_PURCHASE_AND_SALE,SALE_OF_INVESTMENT,PURCHASE_OF_INVESTMENT,NET_BUSINESS_PURCHASE_AND_SALE,SALE_OF_BUSINESS,PURCHASE_OF_BUSINESS,NET_PPE_PURCHASE_AND_SALE,PURCHASE_OF_PPE,OPERATING_CASH_FLOW,CASH_FLOW_FROM_CONTINUING_OPERATING_ACTIVITIES,CHANGE_IN_WORKING_CAPITAL,CHANGE_IN_OTHER_CURRENT_LIABILITIES,CHANGE_IN_PAYABLES_AND_ACCRUED_EXPENSE,CHANGE_IN_ACCRUED_EXPENSE,CHANGE_IN_PAYABLE,CHANGE_IN_ACCOUNT_PAYABLE,CHANGE_IN_PREPAID_ASSETS,CHANGE_IN_INVENTORY,CHANGE_IN_RECEIVABLES,CHANGES_IN_ACCOUNT_RECEIVABLES,OTHER_NON_CASH_ITEMS,STOCK_BASED_COMPENSATION,DEFERRED_TAX,DEFERRED_INCOME_TAX,DEPRECIATION_AMORTIZATION_DEPLETION,DEPRECIATION_AND_AMORTIZATION,OPERATING_GAINS_LOSSES,GAIN_LOSS_ON_INVESTMENT_SECURITIES,GAIN_LOSS_ON_SALE_OF_BUSINESS,NET_INCOME_FROM_CONTINUING_OPERATIONS,ISSUANCE_OF_DEBT,NET_SHORT_TERM_DEBT_ISSUANCE,SHORT_TERM_DEBT_PAYMENTS,LONG_TERM_DEBT_ISSUANCE,NET_OTHER_INVESTING_CHANGES,CHANGE_IN_OTHER_CURRENT_ASSETS,ISSUANCE_OF_CAPITAL_STOCK,EFFECT_OF_EXCHANGE_RATE_CHANGES,COMMON_STOCK_ISSUANCE,CHANGE_IN_OTHER_WORKING_CAPITAL,CHANGE_IN_TAX_PAYABLE,CHANGE_IN_INCOME_TAX_PAYABLE,UNREALIZED_GAIN_LOSS_ON_INVESTMENT_SECURITIES,ASSET_IMPAIRMENT_CHARGE,DEPRECIATION,INTEREST_PAID_SUPPLEMENTAL_DATA,SHORT_TERM_DEBT_ISSUANCE,SALE_OF_PPE,AMORTIZATION_CASH_FLOW,AMORTIZATION_OF_INTANGIBLES,NET_FOREIGN_CURRENCY_EXCHANGE_GAIN_LOSS,GAIN_LOSS_ON_SALE_OF_PPE,AMORTIZATION_OF_SECURITIES,NET_INTANGIBLES_PURCHASE_AND_SALE,SALE_OF_INTANGIBLES,PURCHASE_OF_INTANGIBLES,NET_PREFERRED_STOCK_ISSUANCE,PREFERRED_STOCK_PAYMENTS,PREFERRED_STOCK_ISSUANCE,EARNINGS_LOSSES_FROM_EQUITY_INVESTMENTS,PROVISIONAND_WRITE_OFFOF_ASSETS,CAPITAL_EXPENDITURE_REPORTED,CASH_FROM_DISCONTINUED_INVESTING_ACTIVITIES,CASH_FROM_DISCONTINUED_OPERATING_ACTIVITIES,CASH_FLOW_FROM_DISCONTINUED_OPERATION,CASH_FROM_DISCONTINUED_FINANCING_ACTIVITIES,TAXES_REFUND_PAID,PENSION_AND_EMPLOYEE_BENEFIT_EXPENSE,DIVIDEND_RECEIVED_CFO,DEPLETION,OTHER_CASH_ADJUSTMENT_OUTSIDE_CHANGEIN_CASH,PREFERRED_STOCK_DIVIDEND_PAID,DIVIDENDS_RECEIVED_CFI,NET_INVESTMENT_PROPERTIES_PURCHASE_AND_SALE,SALE_OF_INVESTMENT_PROPERTIES,PURCHASE_OF_INVESTMENT_PROPERTIES,DIVIDEND_PAID_CFO,OTHER_CASH_ADJUSTMENT_INSIDE_CHANGEIN_CASH,CHANGE_IN_INTEREST_PAYABLE,INTEREST_RECEIVED_CFI,INTEREST_PAID_CFF,EXCESS_TAX_BENEFIT_FROM_STOCK_BASED_COMPENSATION,INTEREST_PAID_CFO,CASH_FLOWSFROMUSEDIN_OPERATING_ACTIVITIES_DIRECT,CLASSESOF_CASH_PAYMENTS,OTHER_CASH_PAYMENTSFROM_OPERATING_ACTIVITIES,PAYMENTSON_BEHALFOF_EMPLOYEES,CLASSESOF_CASH_RECEIPTSFROM_OPERATING_ACTIVITIES,OTHER_CASH_RECEIPTSFROM_OPERATING_ACTIVITIES,INTEREST_RECEIVED_CFO,CHANGE_IN_DIVIDEND_PAYABLE
0,2025-12-15,AAPL,2025-09-30 00:00:00,26486000000.0,-20132000000.0,-1185000000.0,-3242000000.0,6037000000.0,35934000000.0,36269000000.0,-335000000.0,-27476000000.0,-27476000000.0,-265000000.0,,-3862000000.0,-3862000000.0,-20132000000.0,-20132000000.0,-3217000000.0,-1250000000.0,-1250000000.0,-2587000000.0,-2587000000.0,1160000000.0,7976000000.0,-6816000000.0,,,,-3242000000.0,-3242000000.0,29728000000.0,29728000000.0,-5707000000.0,4085000000.0,19381000000.0,,19381000000.0,19381000000.0,,177000000.0,-26269000000.0,-12367000000.0,1659000000.0,3183000000.0,,,3127000000.0,3127000000.0,,,,27466000000.0,0.0,-1967000000.0,,0.0,-505000000.0,-3081000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2025-12-15,AAPL,2025-06-30 00:00:00,24405000000.0,-21075000000.0,-1770000000.0,-3462000000.0,5649000000.0,36269000000.0,28162000000.0,8107000000.0,-24833000000.0,-24833000000.0,-2524000000.0,,-3945000000.0,-3945000000.0,-21075000000.0,-21075000000.0,2711000000.0,-1192000000.0,-5673000000.0,5073000000.0,5073000000.0,8875000000.0,14024000000.0,-5149000000.0,,,,-3462000000.0,-3462000000.0,27867000000.0,27867000000.0,-2034000000.0,418000000.0,-3875000000.0,,-3875000000.0,-3875000000.0,,365000000.0,2803000000.0,-1581000000.0,469000000.0,3168000000.0,,,2830000000.0,2830000000.0,,,,23434000000.0,,3903000000.0,3903000000.0,,-340000000.0,-1745000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2025-12-15,AAPL,2025-03-31 00:00:00,20881000000.0,-25898000000.0,976000000.0,-3071000000.0,13032000000.0,28162000000.0,30299000000.0,-2137000000.0,-29006000000.0,-29006000000.0,-326000000.0,,-3758000000.0,-3758000000.0,-25898000000.0,-25898000000.0,976000000.0,-3000000000.0,-3000000000.0,2917000000.0,2917000000.0,6020000000.0,12338000000.0,-6318000000.0,,,,-3071000000.0,-3071000000.0,23952000000.0,23952000000.0,-6507000000.0,-3581000000.0,-7933000000.0,,-7933000000.0,-7933000000.0,,643000000.0,9674000000.0,3669000000.0,-208000000.0,3226000000.0,,,2661000000.0,2661000000.0,,,,24780000000.0,,3976000000.0,3976000000.0,,-32000000.0,-5310000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2025-12-15,AAPL,2024-12-31 00:00:00,26995000000.0,-23606000000.0,-8953000000.0,-2940000000.0,18651000000.0,30299000000.0,29943000000.0,356000000.0,-39371000000.0,-39371000000.0,-2956000000.0,,-3856000000.0,-3856000000.0,-23606000000.0,-23606000000.0,-8953000000.0,-1009000000.0,-1009000000.0,9792000000.0,9792000000.0,13335000000.0,19459000000.0,-6124000000.0,,,,-2940000000.0,-2940000000.0,29935000000.0,29935000000.0,-10752000000.0,-11998000000.0,-6671000000.0,,-6671000000.0,-6671000000.0,,215000000.0,6763000000.0,3597000000.0,-2009000000.0,3286000000.0,,,3080000000.0,3080000000.0,,,,36330000000.0,,-7944000000.0,-7944000000.0,,-603000000.0,939000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2025-12-15,AAPL,2024-09-30 00:00:00,23903000000.0,-25083000000.0,427000000.0,-2908000000.0,6872000000.0,29943000000.0,26635000000.0,3308000000.0,-24948000000.0,-24948000000.0,-448000000.0,,-3804000000.0,-3804000000.0,-25083000000.0,-25083000000.0,4387000000.0,-2558000000.0,-2558000000.0,1445000000.0,1445000000.0,4544000000.0,15126000000.0,-10582000000.0,,,,-2908000000.0,-2908000000.0,26811000000.0,26811000000.0,6608000000.0,15550000000.0,21191000000.0,,21191000000.0,21191000000.0,,-1087000000.0,-22941000000.0,-10485000000.0,-302000000.0,2858000000.0,,,2911000000.0,2911000000.0,,,,14736000000.0,0.0,6945000000.0,,0.0,-191000000.0,-6105000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,2025-12-15,AAPL,2024-06-30 00:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,997000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,2025-12-15,AAPL,2024-03-31 00:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,2025-12-08,AAPL,2025-09-30 00:00:00,26486000000.0,-20132000000.0,-1185000000.0,-3242000000.0,6037000000.0,35934000000.0,36269000000.0,-335000000.0,-27476000000.0,-27476000000.0,-265000000.0,,-3862000000.0,-3862000000.0,-20132000000.0,-20132000000.0,-3217000000.0,-1250000000.0,-1250000000.0,-2587000000.0,-2587000000.0,1160000000.0,7976000000.0,-6816000000.0,,,,-3242000000.0,-3242000000.0,29728000000.0,29728000000.0,-5707000000.0,4085000000.0,19381000000.0,,19381000000.0,19381000000.0,,177000000.0,-26269000000.0,-12367000000.0,1659000000.0,3183000000.0,,,3127000000.0,3127000000.0,,,,27466000000.0,0.0,-1967000000.0,,0.0,-505000000.0,-3081000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,2025-12-08,AAPL,2025-06-30 00:00:00,24405000000.0,-21075000000.0,-1770000000.0,-3462000000.0,5649000000.0,36269000000.0,28162000000.0,8107000000.0,-24833000000.0,-24833000000.0,-2524000000.0,,-3945000000.0,-3945000000.0,-21075000000.0,-21075000000.0,2711000000.0,-1192000000.0,-5673000000.0,5073000000.0,5073000000.0,8875000000.0,14024000000.0,-5149000000.0,,,,-3462000000.0,-3462000000.0,27867000000.0,27867000000.0,-2034000000.0,418000000.0,-3875000000.0,,-3875000000.0,-3875000000.0,,365000000.0,2803000000.0,-1581000000.0,469000000.0,3168000000.0,,,2830000000.0,2830000000.0,,,,23434000000.0,,3903000000.0,3903000000.0,,-340000000.0,-1745000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,2025-12-08,AAPL,2025-03-31 00:00:00,20881000000.0,-25898000000.0,976000000.0,-3071000000.0,13032000000.0,28162000000.0,30299000000.0,-2137000000.0,-29006000000.0,-29006000000.0,-326000000.0,,-3758000000.0,-3758000000.0,-25898000000.0,-25898000000.0,976000000.0,-3000000000.0,-3000000000.0,2917000000.0,2917000000.0,6020000000.0,12338000000.0,-6318000000.0,,,,-3071000000.0,-3071000000.0,23952000000.0,23952000000.0,-6507000000.0,-3581000000.0,-7933000000.0,,-7933000000.0,-7933000000.0,,643000000.0,9674000000.0,3669000000.0,-208000000.0,3226000000.0,,,2661000000.0,2661000000.0,,,,24780000000.0,,3976000000.0,3976000000.0,,-32000000.0,-5310000000.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Company Info

In [None]:
today = date.today()
if today.weekday() == 0:
    client = yahoo_finance.YahooDataClient(
        all_indices["TICKER"].unique(), max_workers=10
    )
    run = True
else:
    client = yahoo_finance.YahooDataClient(missing, max_workers=10)
    if missing:
        run = True
    else:
        run = False

In [None]:
if run:
    all_info = client.get_company_info()
    all_officers = client.get_officer_info()
    all_info = all_info.applymap(str)
    sql_utils.write_sql_table(
        table_name="company_info",
        database_name="CODE_CAPITAL",
        df=all_info,
        overwrite=False,
    )
    sql_utils.write_sql_table(
        table_name="officers",
        database_name="CODE_CAPITAL",
        df=all_officers,
        overwrite=False,
    )

2025-12-15 17:46:06 - INFO - Loading Company Information
2025-12-15 17:46:07 - INFO - Loading Officer Information


In [3]:
query = """
SELECT *
FROM company_info
WHERE 1=1
-- AND "TICKER" IN ('A')
AND DATE = '2025-12-15'
"""

info_df = sql_utils.read_sql_table("CODE_CAPITAL", query=query)

In [4]:
# Read the data from the database into a pandas DataFrame
cachedf = sql_utils.read_sql_table("CODE_CAPITAL", table_name="address")

missing_address = info_df[~(info_df["ADDRESS1"].isin(cachedf["ADDRESS1"].unique()))]

missing_address

Unnamed: 0,DATE,TICKER,ADDRESS1,CITY,STATE,ZIP,COUNTRY,PHONE,WEBSITE,INDUSTRY,INDUSTRYKEY,INDUSTRYDISP,SECTOR,SECTORKEY,SECTORDISP,LONGBUSINESSSUMMARY,FULLTIMEEMPLOYEES,AUDITRISK,BOARDRISK,COMPENSATIONRISK,SHAREHOLDERRIGHTSRISK,OVERALLRISK,GOVERNANCEEPOCHDATE,COMPENSATIONASOFEPOCHDATE,IRWEBSITE,EXECUTIVETEAM,MAXAGE,PRICEHINT,PREVIOUSCLOSE,OPEN,DAYLOW,DAYHIGH,REGULARMARKETPREVIOUSCLOSE,REGULARMARKETOPEN,REGULARMARKETDAYLOW,REGULARMARKETDAYHIGH,DIVIDENDRATE,DIVIDENDYIELD,EXDIVIDENDDATE,PAYOUTRATIO,FIVEYEARAVGDIVIDENDYIELD,BETA,TRAILINGPE,FORWARDPE,VOLUME,REGULARMARKETVOLUME,AVERAGEVOLUME,AVERAGEVOLUME10DAYS,AVERAGEDAILYVOLUME10DAY,BID,ASK,BIDSIZE,ASKSIZE,MARKETCAP,FIFTYTWOWEEKLOW,FIFTYTWOWEEKHIGH,ALLTIMEHIGH,ALLTIMELOW,PRICETOSALESTRAILING12MONTHS,FIFTYDAYAVERAGE,TWOHUNDREDDAYAVERAGE,TRAILINGANNUALDIVIDENDRATE,TRAILINGANNUALDIVIDENDYIELD,CURRENCY,TRADEABLE,ENTERPRISEVALUE,PROFITMARGINS,FLOATSHARES,SHARESOUTSTANDING,SHARESSHORT,SHARESSHORTPRIORMONTH,SHARESSHORTPREVIOUSMONTHDATE,DATESHORTINTEREST,SHARESPERCENTSHARESOUT,HELDPERCENTINSIDERS,HELDPERCENTINSTITUTIONS,SHORTRATIO,SHORTPERCENTOFFLOAT,IMPLIEDSHARESOUTSTANDING,BOOKVALUE,PRICETOBOOK,LASTFISCALYEAREND,NEXTFISCALYEAREND,MOSTRECENTQUARTER,EARNINGSQUARTERLYGROWTH,NETINCOMETOCOMMON,TRAILINGEPS,FORWARDEPS,LASTSPLITFACTOR,LASTSPLITDATE,ENTERPRISETOREVENUE,ENTERPRISETOEBITDA,52WEEKCHANGE,SANDP52WEEKCHANGE,LASTDIVIDENDVALUE,LASTDIVIDENDDATE,QUOTETYPE,CURRENTPRICE,TARGETHIGHPRICE,TARGETLOWPRICE,TARGETMEANPRICE,TARGETMEDIANPRICE,RECOMMENDATIONMEAN,RECOMMENDATIONKEY,NUMBEROFANALYSTOPINIONS,TOTALCASH,TOTALCASHPERSHARE,EBITDA,TOTALDEBT,QUICKRATIO,CURRENTRATIO,TOTALREVENUE,DEBTTOEQUITY,REVENUEPERSHARE,RETURNONASSETS,RETURNONEQUITY,GROSSPROFITS,FREECASHFLOW,OPERATINGCASHFLOW,EARNINGSGROWTH,REVENUEGROWTH,GROSSMARGINS,EBITDAMARGINS,OPERATINGMARGINS,FINANCIALCURRENCY,SYMBOL,LANGUAGE,REGION,TYPEDISP,QUOTESOURCENAME,TRIGGERABLE,CUSTOMPRICEALERTCONFIDENCE,SHORTNAME,LONGNAME,MARKETSTATE,ESGPOPULATED,CORPORATEACTIONS,POSTMARKETTIME,REGULARMARKETTIME,EXCHANGE,MESSAGEBOARDID,EXCHANGETIMEZONENAME,EXCHANGETIMEZONESHORTNAME,GMTOFFSETMILLISECONDS,MARKET,FULLEXCHANGENAME,AVERAGEDAILYVOLUME3MONTH,FIFTYTWOWEEKLOWCHANGE,FIFTYTWOWEEKLOWCHANGEPERCENT,FIFTYTWOWEEKRANGE,FIFTYTWOWEEKHIGHCHANGE,FIFTYTWOWEEKHIGHCHANGEPERCENT,FIFTYTWOWEEKCHANGEPERCENT,DIVIDENDDATE,EARNINGSTIMESTAMP,EARNINGSTIMESTAMPSTART,EARNINGSTIMESTAMPEND,EARNINGSCALLTIMESTAMPSTART,EARNINGSCALLTIMESTAMPEND,ISEARNINGSDATEESTIMATE,EPSTRAILINGTWELVEMONTHS,EPSFORWARD,EPSCURRENTYEAR,PRICEEPSCURRENTYEAR,FIFTYDAYAVERAGECHANGE,FIFTYDAYAVERAGECHANGEPERCENT,TWOHUNDREDDAYAVERAGECHANGE,TWOHUNDREDDAYAVERAGECHANGEPERCENT,SOURCEINTERVAL,EXCHANGEDATADELAYEDBY,PREVNAME,NAMECHANGEDATE,AVERAGEANALYSTRATING,CRYPTOTRADEABLE,HASPREPOSTMARKETDATA,FIRSTTRADEDATEMILLISECONDS,POSTMARKETCHANGEPERCENT,POSTMARKETPRICE,POSTMARKETCHANGE,REGULARMARKETCHANGE,REGULARMARKETDAYRANGE,REGULARMARKETCHANGEPERCENT,REGULARMARKETPRICE,DISPLAYNAME,TRAILINGPEGRATIO,IPOEXPECTEDDATE,ADDRESS2,FAX,INDUSTRYSYMBOL,PEGRATIO


In [5]:
from utils import geo

df = geo.geocode_dataframe(missing_address, cache_df=cachedf, delay=0.5)

df = df[["ADDRESS1", "CITY", "COUNTRY", "LAT", "LON"]].dropna(subset="LAT")

df

Geocoding: 0it [00:00, ?it/s]


Unnamed: 0,ADDRESS1,CITY,COUNTRY,LAT,LON


In [None]:
sql_utils.write_sql_table(
    database_name="CODE_CAPITAL", table_name="address", df=df, overwrite=False
)