# Fetching

## Import useful libraries

In [54]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

## Declare constants and read the API key

In [55]:
load_dotenv()
API_KEY = os.getenv("FMP_API_KEY")

BASE_URL = "https://financialmodelingprep.com/api/v3"
LIMIT = 80  # Number of periods (80 quarters = 20 years)
PERIOD = "quarter"  # Fetch quarterly data
OUTPUT_DIR = "data"
OUTPUT_FILENAME = "balance_sheets_quarterly.csv"
EXCHANGE_SHORT_NAME = "WSE"  # Warsaw Stock Exchange short name

## Useful functions for data fetching

In [56]:
def fetch_all_symbols():
    url = f"{BASE_URL}/stock/list?apikey={API_KEY}"
    response = requests.get(url)
    data = response.json()
    return data


def filter_wse_symbols(all_symbols):
    wse_symbols = [
        stock["symbol"]
        for stock in all_symbols
        if stock["exchangeShortName"] == EXCHANGE_SHORT_NAME
    ]
    return wse_symbols


def fetch_balance_sheet(symbol):
    url = f"{BASE_URL}/balance-sheet-statement/{symbol}?apikey={API_KEY}&limit={LIMIT}&period={PERIOD}"
    response = requests.get(url)
    data = response.json()
    return data


def fetch_historical_stock_prices(symbol, from_date, to_date):
    # Fetch historical prices for the exact date range of balance sheets
    url = f"{BASE_URL}/historical-price-full/{symbol}?from={from_date}&to={to_date}&apikey={API_KEY}"
    response = requests.get(url)
    data = response.json()
    if "historical" in data:
        return data["historical"]
    return []


def get_closest_price(stock_prices, date):
    # Find the closest stock price to the balance sheet date
    for price_record in stock_prices:
        if price_record["date"] <= date:
            return price_record["close"]
    return None


def create_dataframe(symbols):
    all_data = []

    for symbol in symbols:
        balance_sheet_data = fetch_balance_sheet(symbol)

        if not balance_sheet_data:
            continue

        # Get the earliest and latest dates from the balance sheet data
        balance_sheet_dates = [record["date"] for record in balance_sheet_data]
        from_date = min(balance_sheet_dates)
        to_date = max(balance_sheet_dates)

        # Fetch historical prices for the date range
        stock_prices = fetch_historical_stock_prices(symbol, from_date, to_date)

        for record in balance_sheet_data:
            date = record["date"]
            stock_price = get_closest_price(
                stock_prices, date
            )  # Find the closest price to the balance sheet date
            shares_outstanding = record.get(
                "commonStock", None
            )  # Get the number of shares outstanding

            if stock_price is not None and shares_outstanding is not None:
                # Calculate market cap as stock price * shares outstanding
                market_cap = stock_price * shares_outstanding
            else:
                market_cap = None

            record["symbol"] = symbol
            record["marketCap"] = market_cap
            all_data.append(record)

    df = pd.DataFrame(all_data)
    return df

## Fetch data and create a DataFrame

In [57]:
all_symbols = fetch_all_symbols()
all_symbols

[{'symbol': 'PMGOLD.AX',
  'name': 'Perth Mint Gold',
  'price': 17.94,
  'exchange': 'Australian Securities Exchange',
  'exchangeShortName': 'ASX',
  'type': 'etf'},
 {'symbol': 'CVS',
  'name': 'CVS Health Corporation',
  'price': 57.67,
  'exchange': 'New York Stock Exchange',
  'exchangeShortName': 'NYSE',
  'type': 'stock'},
 {'symbol': 'KVHI',
  'name': 'KVH Industries, Inc.',
  'price': 4.615,
  'exchange': 'NASDAQ Global Select',
  'exchangeShortName': 'NASDAQ',
  'type': 'stock'},
 {'symbol': 'HIMS',
  'name': 'Hims & Hers Health, Inc.',
  'price': 14.31,
  'exchange': 'New York Stock Exchange',
  'exchangeShortName': 'NYSE',
  'type': 'stock'},
 {'symbol': 'CMPX',
  'name': 'Compass Therapeutics, Inc.',
  'price': 1.5,
  'exchange': 'NASDAQ Capital Market',
  'exchangeShortName': 'NASDAQ',
  'type': 'stock'},
 {'symbol': 'FNF',
  'name': 'Fidelity National Financial, Inc.',
  'price': 59.05,
  'exchange': 'New York Stock Exchange',
  'exchangeShortName': 'NYSE',
  'type': 's

In [58]:
wse_symbols = filter_wse_symbols(all_symbols)
wse_symbols

['PKP.WA',
 'KGH.WA',
 'NTC.WA',
 'PZU.WA',
 'PCO.WA',
 'ELT.WA',
 'DBC.WA',
 'JSW.WA',
 'CPS.WA',
 'PUR.WA',
 'ATT.WA',
 'ECH.WA',
 'GMT.WA',
 'ALR.WA',
 'MIL.WA',
 'TEN.WA',
 'QRS.WA',
 'BDX.WA',
 'ENA.WA',
 'LPP.WA',
 'MRB.WA',
 'DNP.WA',
 'LBW.WA',
 'SNK.WA',
 'MRC.WA',
 'XTB.WA',
 'SVE.WA',
 '11B.WA',
 'RPC.WA',
 'ETFBCASH.WA',
 'ETFBW20ST.WA',
 'ETFBSPXPL.WA',
 'ETFBW20LV.WA',
 'DOM.WA',
 'GEA.WA',
 'ACT.WA',
 'ALL.WA',
 'ENG.WA',
 'SHO.WA',
 'BRS.WA',
 'PKN.WA',
 'CLN.WA',
 'NNG.WA',
 'PEO.WA',
 'CLC.WA',
 'GRX.WA',
 'WLT.WA',
 'APR.WA',
 'SIM.WA',
 'RNK.WA',
 'PXM.WA',
 'MBK.WA',
 'CDR.WA',
 'RFK.WA',
 'AMB.WA',
 'MSZ.WA',
 'VRC.WA',
 'EUR.WA',
 'CRJ.WA',
 'GPP.WA',
 'CAR.WA',
 'CRI.WA',
 'IBS.WA',
 'MBR.WA',
 'UNT.WA',
 'PEP.WA',
 'VRG.WA',
 'CLE.WA',
 'DBE.WA',
 'GRN.WA',
 'APN.WA',
 'BDZ.WA',
 'BIO.WA',
 'BBD.WA',
 'BCS.WA',
 'FON.WA',
 'KGL.WA',
 'KGN.WA',
 'KPL.WA',
 'OPN.WA',
 'PLW.WA',
 'SNX.WA',
 'ULG.WA',
 'AMC.WA',
 'OTS.WA',
 'ART.WA',
 'IZO.WA',
 'CTX.WA',
 'ATR.WA'

In [59]:
df = create_dataframe(wse_symbols)

In [60]:
df.head()

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,cashAndCashEquivalents,shortTermInvestments,...,totalEquity,totalLiabilitiesAndStockholdersEquity,minorityInterest,totalLiabilitiesAndTotalEquity,totalInvestments,totalDebt,netDebt,link,finalLink,marketCap
0,2024-03-31,PKP.WA,PLN,0,2024-03-31,2024-03-29 20:00:00,2024,Q1,176600000.0,0.0,...,3064900000.0,8250300000.0,0.0,8250300000.0,51500000.0,2895400000.0,2718800000.0,,,29290040000.0
1,2023-12-31,PKP.WA,PLN,0,2023-12-31,2023-12-30 19:00:00,2023,Q4,226200000.0,-10100000.0,...,3202500000.0,8320000000.0,0.0,8320000000.0,42700000.0,2893800000.0,2667600000.0,,,32648990000.0
2,2023-09-30,PKP.WA,PLN,0,2023-09-30,2023-09-29 20:00:00,2023,Q3,216800000.0,-16900000.0,...,3286400000.0,8290200000.0,0.0,8290200000.0,41100000.0,2653000000.0,2436200000.0,,,32962500000.0
3,2023-06-30,PKP.WA,PLN,0,2023-06-30,2023-06-29 20:00:00,2023,Q2,174600000.0,-16900000.0,...,3303200000.0,8107900000.0,0.0,8107900000.0,40500000.0,2650400000.0,2475800000.0,,,38426390000.0
4,2023-03-31,PKP.WA,PLN,0,2023-03-31,2023-03-30 20:00:00,2023,Q1,230300000.0,-16300000.0,...,3357600000.0,8048000000.0,0.0,8048000000.0,42700000.0,2675800000.0,2445500000.0,,,35851190000.0


In [61]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20360 entries, 0 to 20359
Data columns (total 55 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   date                                     20360 non-null  object 
 1   symbol                                   20360 non-null  object 
 2   reportedCurrency                         20360 non-null  object 
 3   cik                                      20360 non-null  object 
 4   fillingDate                              20360 non-null  object 
 5   acceptedDate                             20128 non-null  object 
 6   calendarYear                             20360 non-null  object 
 7   period                                   20360 non-null  object 
 8   cashAndCashEquivalents                   20360 non-null  float64
 9   shortTermInvestments                     20360 non-null  float64
 10  cashAndShortTermInvestments              20360

In [62]:
# Ensure the output directory exists
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Save DataFrame to a CSV file in the output directory
output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILENAME)
df.to_csv(output_path, index=False)

print(f"Data has been saved to {output_path}")

Data has been saved to data\balance_sheets_quarterly.csv
