In [1]:
import sys
import os
import warnings

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(os.path.realpath("__file__")), ".."))
)

warnings.simplefilter(action="ignore", category=FutureWarning)

In [2]:
import pandas as pd
from datetime import date
import time

import data_loader.yahoo_finance as yahoo_finance
import utils.downloading_utils as downloading_utils
import utils.azure_utils as azure_utils
import utils.mapping as mapping

import handyman.holdings as holdings
import handyman.prices as prices
import handyman.company_info as company_info
from utils.yfinance_utils import (
    pull_prices,
    pull_financials,
    create_client,
    pull_officers,
    pull_info,
)
from utils.database_utils import find_missing_tickers

In [3]:
engine = azure_utils.get_azure_engine(configs_path="../configs/configs.json")

# Download Holdings Files

In [4]:
etf_urls = mapping.etf_urls
data_path = os.path.join(
    os.path.abspath(os.path.join(os.path.dirname(os.path.realpath("__file__")), "..")),
    "Data",
)

all_indices = pd.DataFrame()
for fund_name, url in etf_urls.items():
    print(fund_name)
    df_temp = downloading_utils.download_holdings(
        fund_name, url, download_folder=data_path
    )
    all_indices = pd.concat([all_indices, df_temp])

S&P 500
Russell 1000


In [6]:
azure_utils.write_sql_table(
    engine=engine, table_name="holdings", df=all_indices, overwrite=False
)

In [None]:
# holdings.get_index_holdings(
#     # indices=['S&P 500'],
#     # tickers=["AAPL"],
#     start_date="2025-12-30",
# )

In [None]:
# all_indices = holdings.get_index_holdings(
#     # indices=['S&P 500'],
#     # tickers=["AAPL"],
#     start_date="2025-12-30",
# )

In [None]:
# azure_utils.delete_sql_rows(
#     engine=engine,
#     table_name="HOLDINGS",
#     where_clause="date is '2025-12-29 00:00:00'"
# )

# Store Company Data

## Company Prices

In [None]:
all_prices = pull_prices(tickers=all_indices["TICKER"].unique())

azure_utils.write_sql_table(
    engine=engine, table_name="prices", overwrite=False, df=all_prices
)

2025-12-31 16:14:04 - INFO - Loading Prices


In [13]:
adjusted = list(
    all_prices[(all_prices["DIVIDENDS"] > 0) | (all_prices["STOCK_SPLITS"] > 0)][
        "TICKER"
    ].unique()
)
print(adjusted)

if adjusted:
    adj_secs = "' ,'".join(adjusted)
    azure_utils.delete_sql_rows(
        engine=engine,
        table_name="prices",
        where_clause=f"ticker in ('{adj_secs}')",
    )
    adjusted_prices = pull_prices(tickers=adjusted)

    azure_utils.write_sql_table(
        table_name="prices",
        engine=engine,
        df=adjusted_prices,
        overwrite=False,
    )

['AMT', 'BEN', 'GE', 'MKC', 'MU', 'XEL', 'ZBH', 'MTN', 'SEIC']


2025-12-31 16:17:33 - INFO - Loading Prices


In [15]:
prices.get_prices(tickers=["AMT", "MSFT"]).sort_index(ascending=False)

TICKER,AMT,MSFT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-12-31,175.570007,483.619995
2025-12-30,176.720001,487.480011
2025-12-29,175.830002,487.100006
2025-12-26,174.710007,487.709991
2025-12-24,173.115524,488.019989
...,...,...
2000-01-07,22.803617,34.036129
2000-01-06,21.615452,33.597080
2000-01-05,21.843945,34.761539
2000-01-04,21.204170,34.398819


## Financials

In [16]:
run = False

today = date.today()
if today.weekday() == 0:
    all_financial_annual = pull_financials(
        tickers=all_indices["TICKER"].unique(), annual=True, statement_type="financial"
    )
    time.sleep(60 * 5)
    all_financial_quarterly = pull_financials(
        tickers=all_indices["TICKER"].unique(), annual=False, statement_type="financial"
    )
    time.sleep(60 * 5)
    all_balancesheet_annual = pull_financials(
        tickers=all_indices["TICKER"].unique(),
        annual=True,
        statement_type="balance_sheet",
    )
    time.sleep(60 * 5)
    all_balancesheet_quarterly = pull_financials(
        tickers=all_indices["TICKER"].unique(),
        annual=False,
        statement_type="balance_sheet",
    )
    time.sleep(60 * 5)
    all_income_annual = pull_financials(
        tickers=all_indices["TICKER"].unique(),
        annual=True,
        statement_type="income_statement",
    )
    time.sleep(60 * 5)
    all_income_quarterly = pull_financials(
        tickers=all_indices["TICKER"].unique(),
        annual=False,
        statement_type="income_statement",
    )
    time.sleep(60 * 5)
    all_cashflow_annual = pull_financials(
        tickers=all_indices["TICKER"].unique(), annual=True, statement_type="cashflow"
    )
    time.sleep(60 * 5)
    all_cashflow_quarterly = pull_financials(
        tickers=all_indices["TICKER"].unique(), annual=False, statement_type="cashflow"
    )
    run = True

else:
    missing = find_missing_tickers("financial_annual", all_indices["TICKER"].unique())
    print(missing)
    if missing:
        all_financial_annual = pull_financials(
            tickers=missing, annual=True, statement_type="financial"
        )
        time.sleep(60 * 5)
        all_financial_quarterly = pull_financials(
            tickers=missing, annual=False, statement_type="financial"
        )
        time.sleep(60 * 5)
        all_balancesheet_annual = pull_financials(
            tickers=missing, annual=True, statement_type="balance_sheet"
        )
        time.sleep(60 * 5)
        all_balancesheet_quarterly = pull_financials(
            tickers=missing, annual=False, statement_type="balance_sheet"
        )
        time.sleep(60 * 5)
        all_income_annual = pull_financials(
            tickers=missing, annual=True, statement_type="income_statement"
        )
        time.sleep(60 * 5)
        all_income_quarterly = pull_financials(
            tickers=missing, annual=False, statement_type="income_statement"
        )
        time.sleep(60 * 5)
        all_cashflow_annual = pull_financials(
            tickers=missing, annual=True, statement_type="cashflow"
        )
        time.sleep(60 * 5)
        all_cashflow_quarterly = pull_financials(
            tickers=missing, annual=False, statement_type="cashflow"
        )
        run = True

2025-12-31 16:19:31 - INFO - Loading Company Financial


['FRMI']


ValueError: No objects to concatenate

In [7]:
if run:
    azure_utils.write_sql_table(
        table_name="financial_annual",
        engine=engine,
        df=all_financial_annual,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="financial_quarterly",
        engine=engine,
        df=all_financial_quarterly,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="balancesheet_annual",
        engine=engine,
        df=all_balancesheet_annual,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="balancesheet_quarterly",
        engine=engine,
        df=all_balancesheet_quarterly,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="incomestatement_annual",
        engine=engine,
        df=all_income_annual,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="incomestatement_quarterly",
        engine=engine,
        df=all_income_quarterly,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="cashflow_annual",
        engine=engine,
        df=all_cashflow_annual,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="cashflow_quarterly",
        engine=engine,
        df=all_cashflow_quarterly,
        overwrite=False,
    )

## Company Info

In [17]:
run = False

today = date.today()
if today.weekday() == 0:
    client = create_client(tickers=all_indices["TICKER"].unique())
    all_info = pull_info(tickers=all_indices["TICKER"].unique(), client=client)
    all_officers = pull_officers(tickers=all_indices["TICKER"].unique(), client=client)
    run = True
else:
    missing = find_missing_tickers("company_info", all_indices["TICKER"].unique())
    print(missing)
    if missing:
        client = create_client(tickers=missing)
        all_info = pull_info(tickers=missing, client=client)
        all_officers = pull_officers(tickers=missing, client=client)
        run = True

[]


In [8]:
if run:
    azure_utils.write_sql_table(
        table_name="company_info",
        engine=engine,
        df=all_info,
        overwrite=False,
    )
    azure_utils.write_sql_table(
        table_name="officers",
        engine=engine,
        df=all_officers,
        overwrite=False,
    )

In [18]:
info_df = company_info.get_company_info(tickers=None)
cachedf = azure_utils.read_sql_table(engine=engine, table_name="address")

missing_address = info_df[~(info_df["ADDRESS1"].isin(cachedf["ADDRESS1"].unique()))]
missing_address = missing_address.dropna(subset="ADDRESS1")

missing_address

Unnamed: 0,DATE,TICKER,ADDRESS1,CITY,STATE,ZIP,COUNTRY,PHONE,WEBSITE,INDUSTRY,...,REGULARMARKETDAYRANGE,REGULARMARKETCHANGEPERCENT,REGULARMARKETPRICE,DISPLAYNAME,TRAILINGPEGRATIO,IPOEXPECTEDDATE,ADDRESS2,FAX,INDUSTRYSYMBOL,PEGRATIO


In [15]:
from utils import geo

df = geo.geocode_dataframe(missing_address, cache_df=cachedf, delay=0.5)

df = df[["ADDRESS1", "CITY", "COUNTRY", "LAT", "LON"]].dropna(subset="LAT")

df

Geocoding: 0it [00:00, ?it/s]


Unnamed: 0,ADDRESS1,CITY,COUNTRY,LAT,LON


In [16]:
azure_utils.write_sql_table(engine=engine, table_name="address", df=df, overwrite=False)