In [1]:
import sys
sys.path.append("..")  

from openbb_terminal.sdk import openbb
from backend.financialmodelingprep import fmp_stocks
import yaml
from datetime import datetime
import pandas as pd
from backend.db import LogAvailableTickers, LogIngestedTickers, insert_data, read_table_data, engine

with open('../backend/credentials.yaml', 'r') as file:
    config = yaml.safe_load(file)

In [2]:
# Fetch the companies and tickers
openbb.keys.fmp(key=config['api']['fmp'], persist=True)
companies = fmp_stocks.available_companies(config['api']['fmp'])
companies_nasdaq = companies[(companies['exchangeShortName'] == "NASDAQ") & (companies['type'] == "stock")]
companies_milan= companies[(companies['exchangeShortName'] == "MIL") & (companies['type'] == "stock")]

list_ticker_nasdaq = companies_nasdaq.symbol.to_list()
list_ticker_mil = companies_milan.symbol.to_list()

In [3]:
list_ticker = list_ticker_mil

In [4]:
record = {
    'timestamp': datetime.now(),
    'num_companies': len(companies),
    'num_tickers': len(list_ticker)
}

In [5]:

insert_data(LogAvailableTickers, record)

In [7]:

# Partiamo da una lista di ticker ridotta e iteriamo su 100 trimestri. Sicuramente c'è un modo piu ottimale di farlo, ma al momento lo lasciamo cosi
list_ticker = ['AAPL', "PLTR","AMZN","META","NIO","RGTI","FFIEW","NVDA","NFLX"]

source="FinancialModelingPrep"
quarterly=True
limit=100

# Leggiamo i dati già storicizzati
df = read_table_data(engine, "fa_balance")   

The table fa_balance does not exist in the database.


In [8]:
def process_ticker(ticker, df):
    df_quarterly = openbb.stocks.fa.balance(symbol=ticker, source=source, quarterly=quarterly, limit=limit)
    
    if df_quarterly.empty:
        print(f"Ticker {ticker} not found")
        return df, None
    
    df_quarterly = df_quarterly.T.reset_index()
    df_quarterly['ticker'] = ticker
    df_quarterly['current_date'] = pd.Timestamp.now().floor('S')
    df_quarterly.columns = [col.lower().replace(" ","_") for col in df_quarterly.columns]
    
    # Convert both 'calendar_year' columns to datetime
    df_quarterly['calendar_year'] = pd.to_datetime(df_quarterly['calendar_year'])
    if 'calendar_year' in df.columns:
        df['calendar_year'] = pd.to_datetime(df['calendar_year'])
    
    if df.empty:
        record = {
            'ingestion_timestamp': datetime.now(),
            'ticker': ticker,
            'num_new_rows': len(df_quarterly),
            'num_old_rows_diff_dates': 0,
            'num_columns': len(df_quarterly.columns)
        }
        return df_quarterly, record
    
    df_quarterly_merge = df_quarterly.merge(df, how='left', indicator=True, on=['cik', 'calendar_year', 'period'], suffixes=('', '_y'))
    df_new_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'left_only']
    df_old_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'both']
    df_old_rows_diff_dates = df_old_rows[(df_old_rows['filling_date'] != df_old_rows['filling_date_y']) | (df_old_rows['accepted_date'] != df_old_rows['accepted_date_y'])]
    
    record = {
        'ingestion_timestamp': datetime.now(),
        'ticker': ticker,
        'num_new_rows': len(df_new_rows),
        'num_old_rows_diff_dates': len(df_old_rows_diff_dates),
        'num_columns': len(df_quarterly.columns)
    }
    
    return pd.concat([df_new_rows, df_old_rows_diff_dates]), record


In [9]:

# Main script
dataframes = []
records = []

for ticker in list_ticker:
    df_processed, record = process_ticker(ticker, df)
    if df_processed is not None:
        dataframes.append(df_processed)
    if record is not None:
        insert_data(LogIngestedTickers, record)

df_final = pd.concat(dataframes, ignore_index=True)

date_columns = ['filling_date', 'accepted_date', 'current_date']
for col in date_columns:
    df_final[col] = pd.to_datetime(df_final[col])
    
df_final.to_sql('fa_balance', engine, if_exists='replace', index=False)

482