In [8]:
# Import necessary libraries
import pandas as pd
import requests
from datetime import datetime
import yfinance as yf

import os

import dotenv
dotenv.load_dotenv()

True

In [9]:
# Get the current working directory
current_dir = os.getcwd()
# Move up one level from the current directory
parent_dir = os.path.dirname(current_dir)
# Change directory into data directory
data_dir = os.path.join(parent_dir, 'data')

# Extract

## Income Statement

In [35]:
def get_income_statement_quarterly(tickers):
    """
    Fetch quarterly income statement data for a list of tickers using yfinance.

    Args:
    tickers (str or list): A single stock ticker symbol or a list of stock ticker symbols.

    Returns:
    pandas.DataFrame: A DataFrame containing the quarterly income statement data for all tickers.
    """

    all_data = []

    # Ensure tickers is a list
    if isinstance(tickers, str):
        tickers = [tickers]

    for ticker_symbol in tickers:
        ticker = yf.Ticker(ticker_symbol)
        income_statement_quarterly = ticker.quarterly_financials if ticker.quarterly_financials is not None else pd.DataFrame()

        if not income_statement_quarterly.empty:
            income_statement_quarterly['ticker'] = ticker_symbol
            all_data.append(income_statement_quarterly)

    # Concatenate all DataFrames into a single DataFrame
    if all_data:
        result_df = pd.concat(all_data)
    else:
        result_df = pd.DataFrame()

    return result_df

In [210]:
def get_balance_sheet(tickers, api_key):
    """
    Fetch balance sheet data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the balance sheet data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/balance-sheet-statement/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df

In [211]:
def get_cash_flow_statement(tickers, api_key):
    """
    Fetch cash flow statement data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the cash flow statement data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/cash-flow-statement/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df


In [212]:

def get_key_metrics(tickers, api_key):
    """
    Fetch key metrics data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the key metrics data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/key-metrics/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df

In [11]:
# Usage example:
tickers = [
    "ALC",   # Alcon Inc.
    "RXST",   # Johnson & Johnson
    "BLCO",  # Bausch + Lomb Corporation
    "COO"    # C O O Pharma AG
#     "ABT",   # Abbott Laboratories
#     "AFXXF", # Carl Zeiss Meditec AG (OTC)
#     "MDT",   # Medtronic PLC
#     "SYK",   # Stryker Corporation
#     "BSX",   # Boston Scientific Corporation
#     "NVS"    # Novartis AG
 ]

In [30]:
quarterly_income_statement_df = get_income_statement_quarterly(tickers = tickers)
quarterly_income_statement_df

Unnamed: 0,2024-06-30 00:00:00,2024-03-31 00:00:00,2023-12-31 00:00:00,2023-09-30 00:00:00,2023-06-30 00:00:00,2023-03-31 00:00:00,ticker,2024-07-31 00:00:00,2024-04-30 00:00:00,2024-01-31 00:00:00,2023-10-31 00:00:00,2023-07-31 00:00:00,2023-04-30 00:00:00
Tax Effect Of Unusual Items,0.0,0.0,0.0,0.0,0.0,,ALC,,,,,,
Tax Rate For Calcs,0.204,0.26,0.085,0.143,0.207,,ALC,,,,,,
Normalized EBITDA,641000000.0,676000000.0,215000000.0,285000000.0,558000000.0,,ALC,,,,,,
Net Income From Continuing Operation Net Minority Interest,223000000.0,248000000.0,427000000.0,204000000.0,169000000.0,,ALC,,,,,,
Reconciled Depreciation,311000000.0,296000000.0,,,297000000.0,302000000.0,ALC,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Salaries And Wages,,,,,,,COO,,,,,13700000.0,13400000.0
Gross Profit,,,,,,,COO,663000000.0,631200000.0,623800000.0,606500000.0,610000000.0,
Cost Of Revenue,,,,,,,COO,339800000.0,311400000.0,307800000.0,320600000.0,320200000.0,
Total Revenue,,,,,,,COO,1002800000.0,942600000.0,931600000.0,927100000.0,930200000.0,


In [31]:
quarterly_income_statement_df.columns


Index([2024-06-30 00:00:00, 2024-03-31 00:00:00, 2023-12-31 00:00:00,
       2023-09-30 00:00:00, 2023-06-30 00:00:00, 2023-03-31 00:00:00,
                  'ticker', 2024-07-31 00:00:00, 2024-04-30 00:00:00,
       2024-01-31 00:00:00, 2023-10-31 00:00:00, 2023-07-31 00:00:00,
       2023-04-30 00:00:00],
      dtype='object')

In [224]:
quarterly_financial_reports_df = get_quarterly_financial_reports(tickers = tickers, api_key = api_key)
quarterly_financial_reports_df.head()


No data or invalid data format received for ticker: ALC, year: 2022, period: FY
No data or invalid data format received for ticker: ALC, year: 2022, period: Q1
No data or invalid data format received for ticker: ALC, year: 2022, period: Q2
No data or invalid data format received for ticker: ALC, year: 2022, period: Q3
No data or invalid data format received for ticker: ALC, year: 2022, period: Q4
No data or invalid data format received for ticker: ALC, year: 2023, period: FY
No data or invalid data format received for ticker: ALC, year: 2023, period: Q1
No data or invalid data format received for ticker: ALC, year: 2023, period: Q2
No data or invalid data format received for ticker: ALC, year: 2023, period: Q3
No data or invalid data format received for ticker: ALC, year: 2023, period: Q4
No data or invalid data format received for ticker: ALC, year: 2024, period: FY
No data or invalid data format received for ticker: ALC, year: 2024, period: Q1
No data or invalid data format received 

In [195]:
balance_sheet_df = get_balance_sheet(tickers = tickers, api_key = api_key)
balance_sheet_df.head()

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,cashAndCashEquivalents,shortTermInvestments,cashAndShortTermInvestments,netReceivables,inventory,otherCurrentAssets,totalCurrentAssets,propertyPlantEquipmentNet,goodwill,intangibleAssets,goodwillAndIntangibleAssets,longTermInvestments,taxAssets,otherNonCurrentAssets,totalNonCurrentAssets,otherAssets,totalAssets,accountPayables,shortTermDebt,taxPayables,deferredRevenue,otherCurrentLiabilities,totalCurrentLiabilities,longTermDebt,deferredRevenueNonCurrent,deferredTaxLiabilitiesNonCurrent,otherNonCurrentLiabilities,totalNonCurrentLiabilities,otherLiabilities,capitalLeaseObligations,totalLiabilities,preferredStock,commonStock,retainedEarnings,accumulatedOtherComprehensiveIncomeLoss,othertotalStockholdersEquity,totalStockholdersEquity,totalEquity,totalLiabilitiesAndStockholdersEquity,minorityInterest,totalLiabilitiesAndTotalEquity,totalInvestments,totalDebt,netDebt,link,finalLink,ticker
0,2023-12-31,ALC,USD,1167379,2024-02-27,2024-02-27 16:38:27,2023,FY,1093629539,6997629,1100627169,1981328837,2321213703,241918051,5645087762,4721400655,8922977397,9060000000,17986000000,273907215,442849987,534726221,23958884078,0,29603971840,810725371,215926856,113961396,77973586,1374534387,2479160200,4592444338,334886561,796730112,783734514,6507795525,0,334886559,8986955725,0,19993227,0,20597022888,0,20617016115,20617016115,29603971840,0,29603971840,280904845,5143257753,4049628214,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
1,2022-12-31,ALC,USD,1167379,2023-02-27,2023-02-27 16:35:50,2022,FY,980000000,0,980000000,1989000000,2109000000,115000000,5193000000,4416000000,8970000000,9689000000,18659000000,108000000,411000000,422000000,24016000000,0,29209000000,861000000,168000000,317000000,89000000,1664000000,2782000000,4900000000,556000000,1064000000,230000000,6750000000,0,430000000,9532000000,0,20000000,0,19657000000,0,19677000000,19677000000,29209000000,0,29209000000,108000000,5068000000,4088000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
2,2021-12-31,ALC,USD,1167379,2022-02-15,2022-02-15 16:38:34,2021,FY,1575000000,3000000,1578000000,1814000000,1899000000,95000000,5386000000,4083000000,8905000000,8765000000,17670000000,52000000,409000000,399000000,22613000000,0,27999000000,903000000,174000000,280000000,108000000,1287000000,2472000000,4305000000,772000000,1026000000,168000000,6271000000,0,406000000,8743000000,0,20000000,0,19236000000,0,19256000000,19256000000,27999000000,0,27999000000,55000000,4479000000,2904000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
3,2020-12-31,ALC,USD,1167379,2021-02-23,2021-02-23 16:25:19,2020,FY,1557000000,12000000,1569000000,1678000000,1644000000,96000000,4987000000,3783000000,8905000000,9097000000,18002000000,40000000,399000000,389000000,22613000000,0,27600000000,876000000,232000000,259000000,110000000,1040000000,2258000000,4264000000,823000000,1196000000,237000000,6520000000,0,385000000,8778000000,0,20000000,0,18802000000,0,18822000000,18822000000,27600000000,0,27600000000,52000000,4496000000,2939000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
4,2019-12-31,ALC,USD,1167379,2020-02-25,2020-02-25 16:42:17,2019,FY,822000000,33000000,855000000,1786000000,1505000000,90000000,4236000000,3437000000,8905000000,10231000000,19136000000,59000000,354000000,433000000,23419000000,0,27655000000,833000000,306000000,188000000,97000000,1064000000,2300000000,3498000000,854000000,1386000000,314000000,6052000000,0,341000000,8352000000,527000000,20000000,-547000000,19283000000,20000000,19303000000,19303000000,27655000000,0,27655000000,59000000,3804000000,2982000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC


In [196]:
cash_flow_statement_df = get_cash_flow_statement(tickers = tickers, api_key = api_key)
cash_flow_statement_df.head() 


Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,netIncome,depreciationAndAmortization,deferredIncomeTax,stockBasedCompensation,changeInWorkingCapital,accountsReceivables,inventory,accountsPayables,otherWorkingCapital,otherNonCashItems,netCashProvidedByOperatingActivities,investmentsInPropertyPlantAndEquipment,acquisitionsNet,purchasesOfInvestments,salesMaturitiesOfInvestments,otherInvestingActivites,netCashUsedForInvestingActivites,debtRepayment,commonStockIssued,commonStockRepurchased,dividendsPaid,otherFinancingActivites,netCashUsedProvidedByFinancingActivities,effectOfForexChangesOnCash,netChangeInCash,cashAtEndOfPeriod,cashAtBeginningOfPeriod,operatingCashFlow,capitalExpenditure,freeCashFlow,link,finalLink,ticker
0,2023-12-31,ALC,USD,1167379,2024-02-27,2024-02-27 16:38:27,2023,FY,1039613150,1303252214,-142000000,144000000,-431215310,-117410109,-289255814,-54435596,29886209,-432147947,1481502107,-908327300,-2134729,-248695958,2134729,-10673647,-1167696905,-76850222,0,-49000000,-123814297,-24549418,-225213937,31000000,113128485,1093629539,980501054,1481502107,-908327300,573174807,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
1,2022-12-31,ALC,USD,1167379,2023-02-27,2023-02-27 16:35:50,2022,FY,335000000,1123000000,128000000,140000000,-522000000,-164000000,-217000000,-48000000,-93000000,13000000,1217000000,-745000000,-666000000,-50000000,81000000,-485000000,-1865000000,-2336000000,0,-50000000,-103280741,2428000000,-8000000,61000000,-595000000,980000000,1575000000,1217000000,-745000000,472000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
2,2021-12-31,ALC,USD,1167379,2022-02-15,2022-02-15 16:38:34,2021,FY,376000000,1219000000,42000000,138000000,-314000000,-198000000,-326000000,60000000,150000000,-116000000,1345000000,-1180000000,480000000,-19000000,1000000,-480000000,-1198000000,-72000000,0,-22000000,-54114880,3000000,-123000000,-6000000,18000000,1575000000,1557000000,1345000000,-1180000000,165000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
3,2020-12-31,ALC,USD,1167379,2021-02-23,2021-02-23 16:25:19,2020,FY,-531000000,1623000000,-104000000,105000000,-45000000,43000000,-159000000,-21000000,92000000,-225000000,823000000,-567000000,6371042,-11000000,81628958,-82000000,-572000000,-69000000,0,-16000000,0,535000000,466000000,18000000,735000000,1557000000,822000000,823000000,-567000000,256000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
4,2019-12-31,ALC,USD,1167379,2020-02-25,2020-02-25 16:42:17,2019,FY,-656000000,1425000000,324000000,83000000,-48000000,-115000000,-108000000,84000000,91000000,-208000000,920000000,-676000000,-283000000,-59000000,8000000,-1000000,-1011000000,-2061000000,0,0,0,2720000000,659000000,27000000,595000000,822000000,227000000,920000000,-676000000,244000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC


In [197]:
key_metrics_df = get_key_metrics(tickers = tickers, api_key = api_key)
key_metrics_df.head()

Unnamed: 0,symbol,date,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,interestDebtPerShare,marketCap,enterpriseValue,peRatio,priceToSalesRatio,pocfratio,pfcfRatio,pbRatio,ptbRatio,evToSales,enterpriseValueOverEBITDA,evToOperatingCashFlow,evToFreeCashFlow,earningsYield,freeCashFlowYield,debtToEquity,debtToAssets,netDebtToEBITDA,currentRatio,interestCoverage,incomeQuality,dividendYield,payoutRatio,salesGeneralAndAdministrativeToRevenue,researchAndDdevelopementToRevenue,intangiblesToTotalAssets,capexToOperatingCashFlow,capexToRevenue,capexToDepreciation,stockBasedCompensationToRevenue,grahamNumber,roic,returnOnTangibleAssets,grahamNetNet,workingCapital,tangibleAssetValue,netCurrentAssetValue,investedCapital,averageReceivables,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare,ticker
0,ALC,2023-12-31,2023,FY,20.470451,1.975659,3.005075,1.162626,2.232509,41.819505,5.336747,41.819505,10.162481,38513160000.0,42562788214,39.541232,3.816232,25.996021,67.192695,1.868028,1.868028,4.217506,16.601383,28.729482,74.257954,0.02529,0.014883,0.233223,0.162423,1.579535,2.277016,6.248677,1.425051,0.003215,0.127119,0.0,0.087573,0.607554,0.613112,0.090005,0.69697,0.014269,43.115827,0.05804,0.083836,-10.628246,3165927562,2631016115,-3341867963,25873328217,1985164000.0,835862685.5,2215107000.0,71.659716,65.993497,188.948088,5.093517,5.530848,1.931748,0.047243,1.842449,ALC
1,ALC,2022-12-31,2022,FY,17.739113,0.681726,2.476597,0.960521,1.994302,40.042735,2.071632,40.042735,10.586081,33685470000.0,37773470000,100.553642,3.864342,27.679104,71.367521,1.711921,1.711921,4.333311,21.185345,31.038184,80.028538,0.009945,0.014012,0.25756,0.173508,2.292765,1.866643,5.014925,3.632836,0.003066,0.308301,0.0,0.080532,0.63881,0.612161,0.085465,0.663402,0.016061,24.783233,0.019649,0.031754,-12.221713,2411000000,1018000000,-4339000000,25486000000,1901500000.0,882000000.0,2004000000.0,83.283813,79.179894,193.949358,4.382604,4.609756,1.881935,0.017025,1.516077,ALC
2,ALC,2021-12-31,2021,FY,16.920408,0.767347,2.744898,0.336735,3.220408,39.297959,3.236735,39.297959,9.385714,42688800000.0,45592800000,113.534043,5.148812,31.738885,258.72,2.216909,2.216909,5.499071,25.527884,33.897993,276.32,0.008808,0.003865,0.232603,0.15997,1.62598,2.178803,4.833333,3.577128,0.001268,0.143923,0.0,0.101556,0.631094,0.877323,0.142323,0.968007,0.016645,26.047865,0.021981,0.036402,-9.908163,2914000000,1586000000,-3357000000,24667000000,1746000000.0,889500000.0,1771500000.0,79.858883,90.57296,190.474031,4.570562,4.0299,1.916272,0.019526,2.408163,ALC
3,ALC,2020-12-31,2020,FY,13.973415,-1.08589,1.683027,0.523517,3.208589,38.490798,1.676892,38.490798,9.447853,32264220000.0,35203220000,-60.761243,4.721824,39.203183,126.032109,1.714176,1.714176,5.151942,31.043404,42.774265,137.512578,-0.016458,0.007934,0.238869,0.162899,2.591711,2.208592,-3.887097,-1.549906,0.0,0.0,0.0,0.098493,0.652246,0.688943,0.08298,0.349353,0.015367,30.666382,-0.017285,-0.055324,-10.48773,2729000000,820000000,-3791000000,24514000000,1732000000.0,854500000.0,1574500000.0,89.634128,82.132032,154.138197,4.07211,4.444064,2.368005,-0.028212,1.159509,ALC
4,ALC,2019-12-31,2019,FY,15.378943,-1.343712,1.884474,0.499795,1.751331,39.539123,0.342073,39.539123,8.023351,27617470000.0,30599474000,-42.099808,3.678406,30.018993,113.186369,1.430735,1.430735,4.075583,24.958788,33.260298,125.40768,-0.023753,0.008835,0.197068,0.137552,2.4323,1.841739,-1.654867,-1.402439,0.0,0.0,0.379196,0.087373,0.691954,0.734783,0.090037,0.474386,0.011055,34.574651,-0.015991,-0.077004,-11.071282,1936000000,167000000,-4116000000,24509000000,1720500000.0,748000000.0,1472500000.0,86.826052,79.054862,142.830213,4.203807,4.617047,2.555482,-0.033984,1.384678,ALC


# Transform 

In [198]:
def rename_columns_for_business(df):
    """
    Rename DataFrame columns for business use by adding spaces and capitalizing each word.
    
    Args:
    df (pandas.DataFrame): The input DataFrame with original column names.
    
    Returns:
    pandas.DataFrame: A DataFrame with renamed columns.
    """
    def format_column_name(col):
        # Check if the column name contains 'EBITDA'
        if 'ebitda' in col:
            return col.replace('_', ' ').title().replace('Ebitda', 'EBITDA')
        
        # Split the column name by underscores and capital letters
        words = []
        current_word = col[0]
        for char in col[1:]:
            if char.isupper() or char == '_':
                words.append(current_word)
                current_word = char if char != '_' else ''
            else:
                current_word += char
        words.append(current_word)
        
        # Capitalize each word and join with spaces
        return ' '.join(word.capitalize() for word in words if word)
    
    # Create a dictionary of old column names to new column names
    column_mapping = {col: format_column_name(col) for col in df.columns}
    
    # Rename the columns
    renamed_df = df.rename(columns=column_mapping)
    
    # Replace "E B I T D A" with "EBITDA" in all column names
    renamed_df.columns = [col.replace("E B I T D A", "EBITDA") for col in renamed_df.columns]
    
    return renamed_df

In [199]:
balance_sheet_df = rename_columns_for_business(balance_sheet_df)
balance_sheet_df = rename_columns_for_business(balance_sheet_df)
cash_flow_statement_df = rename_columns_for_business(cash_flow_statement_df)
key_metrics_df = rename_columns_for_business(key_metrics_df)


# Load

In [200]:
def load_data(df, data_dir, file_name):
    """
    Load a DataFrame to a CSV file in the specified data directory.

    Args:
    df (pandas.DataFrame): The DataFrame to be saved.
    data_dir (str): The directory where the CSV file will be saved.
    file_name (str): The name of the file to be saved (without extension).
    """

    # Create the full file path
    file_path = os.path.join(data_dir, f"{file_name}.csv")

    # Save the DataFrame to a CSV file
    df.to_csv(file_path, index=False)


In [202]:
# Load the income statement data
load_data(income_statement_df, data_dir, 'income_statement')

# Load the balance sheet data
load_data(balance_sheet_df, data_dir, 'balance_sheet')

# Load the cash flow statement data
load_data(cash_flow_statement_df, data_dir, 'cash_flow_statement')

# Load the key metrics data
load_data(key_metrics_df, data_dir, 'key_metrics')