# OpenBB Data Base

In [1]:
from openbb import obb
import pandas as pd

ratios = [
    "gross_margin", "operating_margin", "net_margin", "roe", "roce",  # Profitability
    "asset_turnover", "inventory_turnover", "receivables_turnover", "operating_cycle",  # Eficciency
    "current_ratio", "quick_ratio", "debt_to_equity", "interest_coverage",  # Liquidity and Solvency
    "price_to_fcf"  # Valuation
]


In [2]:
all_companies = obb.equity.search("", provider="sec")
len(all_companies.results)

9708

In [3]:
sec_df = all_companies.to_df()

In [4]:
sec_df.head()

Unnamed: 0,symbol,name,cik
0,AAPL,Apple Inc.,320193
1,MSFT,MICROSOFT CORP,789019
2,NVDA,NVIDIA CORP,1045810
3,AMZN,AMAZON COM INC,1018724
4,GOOGL,Alphabet Inc.,1652044


In [5]:
stocks = sec_df['symbol'].tolist()
stocks = stocks[350:600]

In [6]:
from dotenv import load_dotenv
import os

key = os.getenv("PAT_OBB")
obb.account.login(pat=key)

In [7]:
from pandas.errors import EmptyDataError

ratio_list = []

for stock in stocks:
    try:
        df = obb.equity.fundamental.ratios(stock, provider="fmp", limit=10).to_df() # limit=10 for 10 year historical data, however, the free plan only allows 5 years
        df['Stock'] = stock
        ratio_list.append(df)

    except EmptyDataError:
        print(f"No data found for the symbol {stock}")

    except Exception as e:
        print(f"Error with {stock}: {e}")


if ratio_list:
    ratio_df = pd.concat(ratio_list, ignore_index=True)
else:
    ratio_df = pd.DataFrame()


Error with DIA: 
[Empty] -> No data found for the symbol DIA.
Error with MDY: 
[Empty] -> No data found for the symbol MDY.


In [8]:
ratio_df = pd.concat(ratio_list, ignore_index=True)

ratio_df

Unnamed: 0,period_ending,fiscal_period,fiscal_year,current_ratio,quick_ratio,cash_ratio,days_of_sales_outstanding,days_of_inventory_outstanding,operating_cycle,days_of_payables_outstanding,...,price_earnings_ratio,price_to_free_cash_flows_ratio,price_to_operating_cash_flows_ratio,price_cash_flow_ratio,price_earnings_to_growth_ratio,price_sales_ratio,dividend_yield,enterprise_value_multiple,price_fair_value,Stock
0,2023-12-31,FY,2023,0.907530,0.721215,0.194971,53.248279,42.755701,96.003980,72.709917,...,16.633520,13.009065,9.893566,9.893566,1.663352,1.516848,0.030294,13.099044,3.480610,CCEP
1,2022-12-31,FY,2022,0.894708,0.706003,0.189662,56.920612,45.394737,102.315349,73.059211,...,15.656598,10.137462,8.052575,8.052575,0.292710,1.363173,0.031735,11.628671,3.170424,CCEP
2,2021-12-31,FY,2021,0.945347,0.755457,0.230921,64.921892,49.088109,114.010001,71.744159,...,22.839397,12.685683,10.594373,10.594373,0.234858,1.629607,0.029581,14.743758,3.189007,CCEP
3,2020-12-31,FY,2020,0.984541,0.820048,0.367874,54.684612,36.175957,90.860569,59.708922,...,37.250527,17.144882,12.450176,12.450176,-0.699949,1.749082,0.019386,15.703477,3.078965,CCEP
4,2019-12-31,FY,2019,0.754070,0.578372,0.076792,54.069797,35.321180,89.390978,55.595415,...,19.452923,17.437242,11.663194,11.663194,0.967658,1.764474,0.027071,12.218512,3.434417,CCEP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1232,2024-12-31,FY,2024,2.525519,1.077664,0.274095,1.128478,77.115461,78.243939,24.088760,...,11.533646,-754.318998,9.611673,9.611673,-0.351502,1.010739,0.015941,8.089440,1.984351,STLD
1233,2023-12-31,FY,2023,2.910362,1.669254,0.600647,31.232891,71.632630,102.865521,26.932591,...,8.025597,10.563667,5.588123,5.588123,-0.266592,1.046526,0.013794,5.750544,2.218398,STLD
1234,2022-12-31,FY,2022,3.758320,2.218207,0.801270,33.712153,70.770049,104.482201,23.000259,...,4.638625,5.045049,4.017013,4.017013,0.134856,0.804891,0.013236,3.521980,2.203777,STLD
1235,2021-12-31,FY,2021,3.098285,1.512948,0.558447,37.997942,98.790462,136.788404,35.826101,...,3.961178,10.628199,5.776181,5.776181,0.007916,0.691596,0.016728,3.162920,2.019384,STLD


In [26]:
ratio_df['Stock'].unique()

array(['DUK', 'MCO', 'ELV', 'RELX', 'SHW', 'BAM', 'BNPQY', 'EQIX', 'ABNB',
       'AON', 'GEV', 'AJG', 'BTI', 'IBKR', 'BN', 'MDLZ', 'CI', 'PBR',
       'BP', 'CTAS', 'DASH', 'PH', 'MMM', 'RACE', 'APO', 'FTNT', 'INFY',
       'CVS', 'TRI', 'MCK', 'ORLY', 'BBVA', 'APH', 'TT', 'GSK', 'HCA',
       'ITW', 'TDG', 'ECL', 'MAR', 'ZTS', 'REGN', 'PNC', 'CL', 'MRVL',
       'COF', 'RSG', 'BMO', 'CMG', 'CEG', 'EPD', 'TEAM', 'USB', 'SE',
       'MFG', 'CP', 'MSI', 'WMB', 'PYPL', 'SCCO', 'APD', 'SNPS', 'GD',
       'NOC', 'WDAY', 'CRH', 'EOG', 'TOELY', 'DELL', 'CDNS', 'EMR',
       'MSTR', 'BDX', 'ET', 'RCL', 'BAESY', 'HLT', 'BK', 'ROP', 'CNI',
       'NTES', 'FDX', 'MBGYY', 'BNS', 'EQNR', 'JD', 'DEO', 'OKE', 'KMI',
       'CSX', 'NGG', 'AFL', 'TFC', 'ADSK', 'SHECY', 'TRV', 'AZO', 'MET',
       'SNOW', 'AEP', 'BCS', 'ING', 'CNQ', 'CM', 'TGT', 'LYG', 'PAYX',
       'MPLX', 'PCAR', 'NSC', 'CARR', 'JCI', 'NXPI', 'PSA', 'PTCAY',
       'SLB', 'ARES', 'MNST', 'HWM', 'AMP', 'ALL', 'CHTR', 'CPRT', 'DLR',


In [27]:
ratio_df.columns

Index(['period_ending', 'fiscal_period', 'fiscal_year', 'current_ratio',
       'quick_ratio', 'cash_ratio', 'days_of_sales_outstanding',
       'days_of_inventory_outstanding', 'operating_cycle',
       'days_of_payables_outstanding', 'cash_conversion_cycle',
       'gross_profit_margin', 'operating_profit_margin',
       'pretax_profit_margin', 'net_profit_margin', 'effective_tax_rate',
       'return_on_assets', 'return_on_equity', 'return_on_capital_employed',
       'net_income_per_ebt', 'ebt_per_ebit', 'ebit_per_revenue', 'debt_ratio',
       'debt_equity_ratio', 'long_term_debt_to_capitalization',
       'total_debt_to_capitalization', 'interest_coverage',
       'cash_flow_to_debt_ratio', 'company_equity_multiplier',
       'receivables_turnover', 'payables_turnover', 'inventory_turnover',
       'fixed_asset_turnover', 'asset_turnover',
       'operating_cash_flow_per_share', 'free_cash_flow_per_share',
       'cash_per_share', 'payout_ratio', 'operating_cash_flow_sales_ratio'

In [9]:
df = pd.read_csv("data/stocks_ratios.csv")

ratio_df = ratio_df[df.columns]

In [10]:
ratio_df = ratio_df[~ratio_df['Stock'].isin(df['Stock'])]

ratio_df.head()

Unnamed: 0,period_ending,fiscal_period,fiscal_year,current_ratio,quick_ratio,cash_ratio,days_of_sales_outstanding,days_of_inventory_outstanding,operating_cycle,days_of_payables_outstanding,...,price_to_free_cash_flows_ratio,price_to_operating_cash_flows_ratio,price_cash_flow_ratio,price_earnings_to_growth_ratio,price_sales_ratio,dividend_yield,enterprise_value_multiple,price_fair_value,interest_coverage,Stock
0,2023-12-31,FY,2023,0.90753,0.721215,0.194971,53.248279,42.755701,96.00398,72.709917,...,13.009065,9.893566,9.893566,1.663352,1.516848,0.030294,13.099044,3.48061,14.438272,CCEP
1,2022-12-31,FY,2022,0.894708,0.706003,0.189662,56.920612,45.394737,102.315349,73.059211,...,10.137462,8.052575,8.052575,0.29271,1.363173,0.031735,11.628671,3.170424,15.923664,CCEP
2,2021-12-31,FY,2021,0.945347,0.755457,0.230921,64.921892,49.088109,114.010001,71.744159,...,12.685683,10.594373,10.594373,0.234858,1.629607,0.029581,14.743758,3.189007,12.755396,CCEP
3,2020-12-31,FY,2020,0.984541,0.820048,0.367874,54.684612,36.175957,90.860569,59.708922,...,17.144882,12.450176,12.450176,-0.699949,1.749082,0.019386,15.703477,3.078965,6.775,CCEP
4,2019-12-31,FY,2019,0.75407,0.578372,0.076792,54.069797,35.32118,89.390978,55.595415,...,17.437242,11.663194,11.663194,0.967658,1.764474,0.027071,12.218512,3.434417,12.052584,CCEP


In [11]:
print(len(df['Stock'].unique()) + len(ratio_df['Stock'].unique()))

593


In [12]:
updated_df = pd.concat([df, ratio_df], ignore_index=True)

In [13]:
len(updated_df['Stock'].unique())

593

In [14]:
updated_df.shape

(2959, 58)

In [15]:
updated_df.to_csv("data/stocks_ratios.csv", index=False)

In [43]:
ratio_df = pd.read_csv("data/stocks_ratios.csv")

In [48]:
ratio_df['period_ending'] = pd.to_datetime(ratio_df['period_ending'])
ratio_df[ratio_df['period_ending'].dt.year == 2024]['period_ending'].unique()

<DatetimeArray>
['2024-09-28 00:00:00', '2024-06-30 00:00:00', '2024-01-28 00:00:00',
 '2024-12-31 00:00:00', '2024-11-03 00:00:00', '2024-01-31 00:00:00',
 '2024-09-30 00:00:00', '2024-09-01 00:00:00', '2024-05-31 00:00:00',
 '2024-12-29 00:00:00', '2024-03-31 00:00:00', '2024-07-27 00:00:00',
 '2024-08-31 00:00:00', '2024-12-28 00:00:00', '2024-11-29 00:00:00',
 '2024-09-29 00:00:00', '2024-07-31 00:00:00', '2024-10-31 00:00:00',
 '2024-02-03 00:00:00', '2024-02-02 00:00:00', '2024-10-27 00:00:00',
 '2024-04-26 00:00:00', '2024-11-02 00:00:00', '2024-08-29 00:00:00',
 '2024-09-27 00:00:00']
Length: 25, dtype: datetime64[ns]

In [46]:
ids = ratio_df['Stock'].unique().tolist()

In [62]:
df_list = []

for id in ids:
    try:
        price = obb.equity.price.historical(symbol=id, start_date="2020-01-01", provider="yfinance").to_df()
        price['Stock'] = id
        df_list.append(price)
    
    except EmptyDataError:
        print(f"No data found for the symbol {id}")

    except Exception as e:
        print(f"Error with {id}: {e}")


if df_list:
    price_df = pd.concat(df_list, ignore_index=False)
else:
    price_df = pd.DataFrame()

Could not get exchangeTimezoneName for ticker 'BHP' reason: 'chart'

1 Failed download:
['BHP']: YFTzMissingError('possibly delisted; no timezone found')


Error with BHP: 
[Empty] -> No results found. Try adjusting the query parameters.


In [69]:
price_df = price_df.reset_index()

In [83]:
price_df = price_df[['date', 'Stock', 'close']]

In [84]:
ratio_df['period_ending'] = pd.to_datetime(ratio_df['period_ending'])
price_df['date'] = pd.to_datetime(price_df['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  price_df['date'] = pd.to_datetime(price_df['date'])


In [92]:
df = pd.merge(ratio_df, price_df, how='left', left_on=['period_ending', 'Stock'], right_on=['date', 'Stock'])