In [1]:
import sys
sys.path.append("..")  

from openbb_terminal.sdk import openbb
from backend.financialmodelingprep import fmp_stocks
import yaml
from datetime import datetime
import pandas as pd
import os
from backend.db import LogAvailableTickers, LogIngestedTickers,Balance,  bulk_insert_data_from_dataframe, read_table_data, engine

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

database_path = os.getenv("DATABASE_PATH")

fmp_api_aky = os.getenv("FMP_SECRET_KEY")

In [2]:
%load_ext autoreload
%autoreload 2


In [3]:
# Fetch the companies and tickers
openbb.keys.fmp(key=fmp_api_aky, persist=True)
companies = fmp_stocks.available_companies(fmp_api_aky)
companies_nasdaq = companies[(companies['exchangeShortName'] == "NASDAQ") & (companies['type'] == "stock")]
companies_milan= companies[(companies['exchangeShortName'] == "MIL") & (companies['type'] == "stock")]

list_ticker_nasdaq = companies_nasdaq.symbol.to_list()
list_ticker_mil = companies_milan.symbol.to_list()

In [4]:
list_ticker = list_ticker_mil

In [5]:

df_log = pd.DataFrame([{
    'timestamp': datetime.now(),
    'num_companies': len(companies),
    'num_tickers': len(list_ticker)
}]) 

In [6]:
bulk_insert_data_from_dataframe(LogAvailableTickers, df_log)

In [7]:

# Partiamo da una lista di ticker ridotta e iteriamo su 100 trimestri. Sicuramente c'è un modo piu ottimale di farlo, ma al momento lo lasciamo cosi
list_ticker = ['AAPL', "PLTR","AMZN","META","NIO","RGTI","FFIEW","NVDA","NFLX","BABA"]

source="FinancialModelingPrep"
quarterly=True
limit=100

# Leggiamo i dati già storicizzati
df = read_table_data("fa_balance")   

In [8]:
df

Unnamed: 0,fiscal_date_ending,reported_currency,cik,filling_date,accepted_date,calendar_year,period,cash_and_cash_equivalents,short_term_investments,cash_and_short_term_investments,...,total_liabilities_and_stockholders_equity,minority_interest,total_liabilities_and_total_equity,total_investments,total_debt,net_debt,link,final_link,ticker,current_date


In [10]:
def process_ticker(ticker, df):
    df_quarterly = openbb.stocks.fa.balance(symbol=ticker, source=source, quarterly=quarterly, limit=limit)
    
    if df_quarterly.empty:
        print(f"Ticker {ticker} not found")
        return df, None
    
    df_quarterly = df_quarterly.T.reset_index()
    df_quarterly['ticker'] = ticker
    df_quarterly['current_date'] = pd.Timestamp.now().floor('S')
    df_quarterly.columns = [col.lower().replace(" ","_") for col in df_quarterly.columns]
    
    # Convert both 'calendar_year' columns to datetime
    df_quarterly['calendar_year'] = pd.to_datetime(df_quarterly['calendar_year'])
    if 'calendar_year' in df.columns:
        df['calendar_year'] = pd.to_datetime(df['calendar_year'])
    
    if df.empty:
        
        df_log_process = pd.DataFrame([{

            'ingestion_timestamp': datetime.now(),
            'ticker': ticker,
            'num_new_rows': len(df_quarterly),
            'num_old_rows_diff_dates': 0,
            'num_columns': len(df_quarterly.columns)
        
        }])
            
            
        
        return df_quarterly, df_log_process
    
    df_quarterly_merge = df_quarterly.merge(df, how='left', indicator=True, on=['cik', 'calendar_year', 'period'], suffixes=('', '_y'))
    df_new_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'left_only']
    df_old_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'both']
    df_old_rows_diff_dates = df_old_rows[(df_old_rows['filling_date'] != df_old_rows['filling_date_y']) | (df_old_rows['accepted_date'] != df_old_rows['accepted_date_y'])]
    
    df_log_process = pd.DataFrame([{
        'ingestion_timestamp': datetime.now(),
        'ticker': ticker,
        'num_new_rows': len(df_new_rows),
        'num_old_rows_diff_dates': len(df_old_rows_diff_dates),
        'num_columns': len(df_quarterly.columns)
    }])
    

    return pd.concat([df_new_rows[df.columns], df_old_rows_diff_dates[df.columns]]), df_log_process

In [12]:
# Main script
dataframes = []
records = []

for ticker in list_ticker:
    df_processed, df_log = process_ticker(ticker, df)
    if df_processed is not None:
        dataframes.append(df_processed)
    if df_log is not None:
        bulk_insert_data_from_dataframe(LogIngestedTickers, df_log)

df_final = pd.concat(dataframes, ignore_index=True)

date_columns = ['filling_date', 'accepted_date', 'current_date']
for col in date_columns:
    df_final[col] = pd.to_datetime(df_final[col])

In [13]:
df_final

Unnamed: 0,fiscal_date_ending,reported_currency,cik,filling_date,accepted_date,calendar_year,period,cash_and_cash_equivalents,short_term_investments,cash_and_short_term_investments,...,total_liabilities_and_stockholders_equity,minority_interest,total_liabilities_and_total_equity,total_investments,total_debt,net_debt,link,final_link,ticker,current_date
0,2023-07,USD,0000320193,2023-08-04,2023-08-03 18:04:43,2023-01-01,Q3,28408000000,34074000000,62482000000,...,335038000000,0,335038000000,34074000000,109280000000,80872000000,https://www.sec.gov/Archives/edgar/data/320193...,https://www.sec.gov/Archives/edgar/data/320193...,AAPL,2023-08-11 15:19:27
1,2023-04,USD,0000320193,2023-05-05,2023-05-04 18:03:52,2023-01-01,Q2,24687000000,31185000000,55872000000,...,332160000000,0,332160000000,31185000000,109615000000,84928000000,https://www.sec.gov/Archives/edgar/data/320193...,https://www.sec.gov/Archives/edgar/data/320193...,AAPL,2023-08-11 15:19:27
2,2022-12,USD,0000320193,2023-02-03,2023-02-02 18:01:30,2023-01-01,Q1,20535000000,30820000000,51355000000,...,346747000000,0,346747000000,144915000000,111110000000,90575000000,https://www.sec.gov/Archives/edgar/data/320193...,https://www.sec.gov/Archives/edgar/data/320193...,AAPL,2023-08-11 15:19:27
3,2022-09,USD,0000320193,2022-10-28,2022-10-27 18:01:14,2022-01-01,Q4,23646000000,24658000000,48304000000,...,352755000000,0,352755000000,145463000000,120069000000,96423000000,https://www.sec.gov/Archives/edgar/data/320193...,https://www.sec.gov/Archives/edgar/data/320193...,AAPL,2023-08-11 15:19:27
4,2022-06,USD,0000320193,2022-07-29,2022-07-28 18:06:56,2022-01-01,Q3,27502000000,20729000000,48231000000,...,336309000000,0,336309000000,151806000000,119691000000,92189000000,https://www.sec.gov/Archives/edgar/data/320193...,https://www.sec.gov/Archives/edgar/data/320193...,AAPL,2023-08-11 15:19:27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520,2011-12,CNY,0001577552,2011-12-31,2011-12-31 00:00:00,2012-01-01,Q3,3433048000,8218752000,11651800000,...,15295468000,102392000,15295468000,8453315000,1286489000,-2146559000,,,BABA,2023-08-11 15:19:37
521,2011-06,CNY,0001577552,2011-06-30,2011-06-30 00:00:00,2012-01-01,Q1,2904518000,7685823000,10590341000,...,13335807000,86534000,13335807000,7917937000,819535000,-2084983000,,,BABA,2023-08-11 15:19:37
522,2010-12,CNY,0001577552,2010-12-31,2010-12-31 00:00:00,2011-01-01,Q3,3254344000,6802508000,10056852000,...,12705140000,49816000,12705140000,6866571000,92718000,-3161626000,,,BABA,2023-08-11 15:19:37
523,2010-06,CNY,0001577552,2010-06-30,2010-06-30 00:00:00,2011-01-01,Q1,2078811000,6083695000,8162506000,...,10443440000,41690000,10443440000,6107966000,0,-2078811000,,,BABA,2023-08-11 15:19:37


In [14]:
bulk_insert_data_from_dataframe(Balance, df_final)