In [66]:
# Import necessary libraries
import pandas as pd
import requests
from datetime import datetime

import os

import dotenv
dotenv.load_dotenv()

True

In [67]:
# Get the current working directory
current_dir = os.getcwd()
# Move up one level from the current directory
parent_dir = os.path.dirname(current_dir)
# Change directory into data directory
data_dir = os.path.join(parent_dir, 'data')

# Extract

## Income Statement

In [53]:
def get_income_statement(tickers, api_key):
    """
    Fetch income statement data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the income statement data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        # Define the endpoint with 10 years of data
        url = f'https://financialmodelingprep.com/api/v3/income-statement/{ticker}?limit=120&apikey={api_key}'

        # Send a GET request to the API
        response = requests.get(url)

        # Parse the JSON data
        data = response.json()

        # Convert the data to a DataFrame and add a column for the ticker
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    # Concatenate all DataFrames into a single DataFrame
    result_df = pd.concat(all_data, ignore_index=True)
    
    return result_df

In [54]:
def get_balance_sheet(tickers, api_key):
    """
    Fetch balance sheet data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the balance sheet data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/balance-sheet-statement/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df

In [55]:
def get_cash_flow_statement(tickers, api_key):
    """
    Fetch cash flow statement data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the cash flow statement data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/cash-flow-statement/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df


In [56]:

def get_key_metrics(tickers, api_key):
    """
    Fetch key metrics data for a list of tickers using the Financial Modeling Prep API.

    Args:
    tickers (list): A list of stock ticker symbols.
    api_key (str): Your API key for Financial Modeling Prep.

    Returns:
    pandas.DataFrame: A DataFrame containing the key metrics data for all tickers.
    """
    all_data = []

    for ticker in tickers:
        url = f'https://financialmodelingprep.com/api/v3/key-metrics/{ticker}?limit=120&apikey={api_key}'
        response = requests.get(url)
        data = response.json()
        df = pd.DataFrame(data)
        df['ticker'] = ticker
        all_data.append(df)

    result_df = pd.concat(all_data, ignore_index=True)
    return result_df

In [57]:
# Usage example:
tickers = [
    "ALC",   # Alcon Inc.
    "RXST",   # Johnson & Johnson
    "BLCO",  # Bausch + Lomb Corporation
    "COO"    # C O O Pharma AG
#     "ABT",   # Abbott Laboratories
#     "AFXXF", # Carl Zeiss Meditec AG (OTC)
#     "MDT",   # Medtronic PLC
#     "SYK",   # Stryker Corporation
#     "BSX",   # Boston Scientific Corporation
#     "NVS"    # Novartis AG
 ]

api_key = os.environ.get('FMP_API_KEY')

In [58]:
income_statement_df = get_income_statement(tickers = tickers, api_key = api_key)
income_statement_df.head()

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,revenue,costOfRevenue,...,incomeTaxExpense,netIncome,netIncomeRatio,eps,epsdiluted,weightedAverageShsOut,weightedAverageShsOutDil,link,finalLink,ticker
0,2023-12-31,ALC,USD,1167379,2024-02-27,2024-02-27 16:38:27,2023,FY,10091932583,4483998813,...,-151565777,974000000,0.096513,2.11,2.09,493000000,496500000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
1,2022-12-31,ALC,USD,1167379,2023-02-27,2023-02-27 16:35:50,2022,FY,8717000000,3969000000,...,128000000,335000000,0.038431,0.7,0.7,491400000,494400000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
2,2021-12-31,ALC,USD,1167379,2022-02-15,2022-02-15 16:38:34,2021,FY,8291000000,3639000000,...,42000000,376000000,0.04535,0.77,0.76,490000000,493400000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
3,2020-12-31,ALC,USD,1167379,2021-02-23,2021-02-23 16:25:19,2020,FY,6833000000,3893000000,...,-104000000,-531000000,-0.077711,-1.09,-1.09,489000000,489000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
4,2019-12-31,ALC,USD,1167379,2020-02-25,2020-02-25 16:42:17,2019,FY,7508000000,3846000000,...,324000000,-656000000,-0.087373,-1.34,-1.34,488200000,488200000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC


In [59]:
balance_sheet_df = get_balance_sheet(tickers = tickers, api_key = api_key)
balance_sheet_df.head()

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,cashAndCashEquivalents,shortTermInvestments,...,totalEquity,totalLiabilitiesAndStockholdersEquity,minorityInterest,totalLiabilitiesAndTotalEquity,totalInvestments,totalDebt,netDebt,link,finalLink,ticker
0,2023-12-31,ALC,USD,1167379,2024-02-27,2024-02-27 16:38:27,2023,FY,1093629539,6997629,...,20617016115,29603971840,0,29603971840,280904845,5143257753,4049628214,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
1,2022-12-31,ALC,USD,1167379,2023-02-27,2023-02-27 16:35:50,2022,FY,980000000,0,...,19677000000,29209000000,0,29209000000,108000000,5068000000,4088000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
2,2021-12-31,ALC,USD,1167379,2022-02-15,2022-02-15 16:38:34,2021,FY,1575000000,3000000,...,19256000000,27999000000,0,27999000000,55000000,4479000000,2904000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
3,2020-12-31,ALC,USD,1167379,2021-02-23,2021-02-23 16:25:19,2020,FY,1557000000,12000000,...,18822000000,27600000000,0,27600000000,52000000,4496000000,2939000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
4,2019-12-31,ALC,USD,1167379,2020-02-25,2020-02-25 16:42:17,2019,FY,822000000,33000000,...,19303000000,27655000000,0,27655000000,59000000,3804000000,2982000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC


In [60]:
cash_flow_statement_df = get_cash_flow_statement(tickers = tickers, api_key = api_key)
cash_flow_statement_df.head() 


Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,netIncome,depreciationAndAmortization,...,effectOfForexChangesOnCash,netChangeInCash,cashAtEndOfPeriod,cashAtBeginningOfPeriod,operatingCashFlow,capitalExpenditure,freeCashFlow,link,finalLink,ticker
0,2023-12-31,ALC,USD,1167379,2024-02-27,2024-02-27 16:38:27,2023,FY,1039613150,1303252214,...,31000000,113128485,1093629539,980501054,1481502107,-908327300,573174807,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
1,2022-12-31,ALC,USD,1167379,2023-02-27,2023-02-27 16:35:50,2022,FY,335000000,1123000000,...,61000000,-595000000,980000000,1575000000,1217000000,-745000000,472000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
2,2021-12-31,ALC,USD,1167379,2022-02-15,2022-02-15 16:38:34,2021,FY,376000000,1219000000,...,-6000000,18000000,1575000000,1557000000,1345000000,-1180000000,165000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
3,2020-12-31,ALC,USD,1167379,2021-02-23,2021-02-23 16:25:19,2020,FY,-531000000,1623000000,...,18000000,735000000,1557000000,822000000,823000000,-567000000,256000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC
4,2019-12-31,ALC,USD,1167379,2020-02-25,2020-02-25 16:42:17,2019,FY,-656000000,1425000000,...,27000000,595000000,822000000,227000000,920000000,-676000000,244000000,https://www.sec.gov/Archives/edgar/data/116737...,https://www.sec.gov/Archives/edgar/data/116737...,ALC


In [28]:
key_metrics_df = get_key_metrics(tickers = tickers, api_key = api_key)
key_metrics_df.head()

Unnamed: 0,symbol,date,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,...,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare,ticker
0,ALC,2023-12-31,2023,FY,20.470451,1.975659,3.005075,1.162626,2.232509,41.819505,...,2215107000.0,71.659716,65.993497,188.948088,5.093517,5.530848,1.931748,0.047243,1.842449,ALC
1,ALC,2022-12-31,2022,FY,17.739113,0.681726,2.476597,0.960521,1.994302,40.042735,...,2004000000.0,83.283813,79.179894,193.949358,4.382604,4.609756,1.881935,0.017025,1.516077,ALC
2,ALC,2021-12-31,2021,FY,16.920408,0.767347,2.744898,0.336735,3.220408,39.297959,...,1771500000.0,79.858883,90.57296,190.474031,4.570562,4.0299,1.916272,0.019526,2.408163,ALC
3,ALC,2020-12-31,2020,FY,13.973415,-1.08589,1.683027,0.523517,3.208589,38.490798,...,1574500000.0,89.634128,82.132032,154.138197,4.07211,4.444064,2.368005,-0.028212,1.159509,ALC
4,ALC,2019-12-31,2019,FY,15.378943,-1.343712,1.884474,0.499795,1.751331,39.539123,...,1472500000.0,86.826052,79.054862,142.830213,4.203807,4.617047,2.555482,-0.033984,1.384678,ALC


# Transform 

In [61]:
def rename_columns_for_business(df):
    """
    Rename DataFrame columns for business use by adding a single space between words and capitalizing each word.
    
    Args:
    df (pandas.DataFrame): The input DataFrame with original column names.
    
    Returns:
    pandas.DataFrame: A DataFrame with renamed columns.
    """
    def format_column_name(col):
        # Check if the column name contains 'EBITDA'
        if 'ebitda' in col.lower():
            return ' '.join(word.capitalize() for word in col.replace('_', ' ').split()).replace('Ebitda', 'EBITDA')
        
        # Split the column name by underscores and capital letters
        words = []
        current_word = col[0]
        for char in col[1:]:
            if char.isupper() or char == '_':
                words.append(current_word)
                current_word = char if char != '_' else ''
            else:
                current_word += char
        words.append(current_word)
        
        # Capitalize each word and join with single spaces
        return ' '.join(word.capitalize() for word in words if word)
    
    # Create a dictionary of old column names to new column names
    column_mapping = {col: format_column_name(col) for col in df.columns}
    
    # Rename the columns
    renamed_df = df.rename(columns=column_mapping)
    
    # Replace multiple spaces with a single space in all column names
    renamed_df.columns = [' '.join(col.split()) for col in renamed_df.columns]
    
    return renamed_df

In [62]:
balance_sheet_df = rename_columns_for_business(balance_sheet_df)
balance_sheet_df = rename_columns_for_business(balance_sheet_df)
cash_flow_statement_df = rename_columns_for_business(cash_flow_statement_df)
key_metrics_df = rename_columns_for_business(key_metrics_df)


In [63]:
balance_sheet_df.columns

Index(['Date', 'Symbol', 'Reported Currency', 'Cik', 'Filling Date',
       'Accepted Date', 'Calendar Year', 'Period', 'Cash And Cash Equivalents',
       'Short Term Investments', 'Cash And Short Term Investments',
       'Net Receivables', 'Inventory', 'Other Current Assets',
       'Total Current Assets', 'Property Plant Equipment Net', 'Goodwill',
       'Intangible Assets', 'Goodwill And Intangible Assets',
       'Long Term Investments', 'Tax Assets', 'Other Non Current Assets',
       'Total Non Current Assets', 'Other Assets', 'Total Assets',
       'Account Payables', 'Short Term Debt', 'Tax Payables',
       'Deferred Revenue', 'Other Current Liabilities',
       'Total Current Liabilities', 'Long Term Debt',
       'Deferred Revenue Non Current', 'Deferred Tax Liabilities Non Current',
       'Other Non Current Liabilities', 'Total Non Current Liabilities',
       'Other Liabilities', 'Capital Lease Obligations', 'Total Liabilities',
       'Preferred Stock', 'Common Stock'

# Load

In [64]:
def load_data(df, data_dir, file_name):
    """
    Load a DataFrame to a CSV file in the specified data directory.

    Args:
    df (pandas.DataFrame): The DataFrame to be saved.
    data_dir (str): The directory where the CSV file will be saved.
    file_name (str): The name of the file to be saved (without extension).
    """

    # Create the full file path
    file_path = os.path.join(data_dir, f"{file_name}.csv")

    # Save the DataFrame to a CSV file
    df.to_csv(file_path, index=False)


In [65]:
# Load the income statement data
load_data(income_statement_df, data_dir, 'income_statement')

# Load the balance sheet data
load_data(balance_sheet_df, data_dir, 'balance_sheet')

# Load the cash flow statement data
load_data(cash_flow_statement_df, data_dir, 'cash_flow_statement')

# Load the key metrics data
load_data(key_metrics_df, data_dir, 'key_metrics')