In [1]:
import sys
sys.path.append("..")  

from openbb_terminal.sdk import openbb
from backend.financialmodelingprep import fmp_stocks
import yaml
from datetime import datetime
import pandas as pd
from backend.db import LogAvailableTickers, LogIngestedTickers,Balance,  bulk_insert_data_from_dataframe, read_table_data, engine

with open('../backend/credentials.yaml', 'r') as file:
    config = yaml.safe_load(file)

In [2]:
# Fetch the companies and tickers
openbb.keys.fmp(key=config['api']['fmp'], persist=True)
companies = fmp_stocks.available_companies(config['api']['fmp'])
companies_nasdaq = companies[(companies['exchangeShortName'] == "NASDAQ") & (companies['type'] == "stock")]
companies_milan= companies[(companies['exchangeShortName'] == "MIL") & (companies['type'] == "stock")]

list_ticker_nasdaq = companies_nasdaq.symbol.to_list()
list_ticker_mil = companies_milan.symbol.to_list()

In [3]:
list_ticker = list_ticker_mil

In [4]:

df_log = pd.DataFrame([{
    'timestamp': datetime.now(),
    'num_companies': len(companies),
    'num_tickers': len(list_ticker)
}]) 

In [5]:
bulk_insert_data_from_dataframe(LogAvailableTickers, df_log)

In [6]:

# Partiamo da una lista di ticker ridotta e iteriamo su 100 trimestri. Sicuramente c'è un modo piu ottimale di farlo, ma al momento lo lasciamo cosi
list_ticker = ['AAPL', "PLTR","AMZN","META","NIO","RGTI","FFIEW","NVDA","NFLX","BABA"]

source="FinancialModelingPrep"
quarterly=True
limit=100

# Leggiamo i dati già storicizzati
df = read_table_data(engine, "fa_balance")   

In [18]:
def process_ticker(ticker, df):
    df_quarterly = openbb.stocks.fa.balance(symbol=ticker, source=source, quarterly=quarterly, limit=limit)
    
    if df_quarterly.empty:
        print(f"Ticker {ticker} not found")
        return df, None
    
    df_quarterly = df_quarterly.T.reset_index()
    df_quarterly['ticker'] = ticker
    df_quarterly['current_date'] = pd.Timestamp.now().floor('S')
    df_quarterly.columns = [col.lower().replace(" ","_") for col in df_quarterly.columns]
    
    # Convert both 'calendar_year' columns to datetime
    df_quarterly['calendar_year'] = pd.to_datetime(df_quarterly['calendar_year'])
    if 'calendar_year' in df.columns:
        df['calendar_year'] = pd.to_datetime(df['calendar_year'])
    
    if df.empty:
        
        df_log_process = pd.DataFrame([{

            'ingestion_timestamp': datetime.now(),
            'ticker': ticker,
            'num_new_rows': len(df_quarterly),
            'num_old_rows_diff_dates': 0,
            'num_columns': len(df_quarterly.columns)
        
        }])
            
            
        
        return df_quarterly, df_log_process
    
    df_quarterly_merge = df_quarterly.merge(df, how='left', indicator=True, on=['cik', 'calendar_year', 'period'], suffixes=('', '_y'))
    df_new_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'left_only']
    df_old_rows = df_quarterly_merge[df_quarterly_merge['_merge'] == 'both']
    df_old_rows_diff_dates = df_old_rows[(df_old_rows['filling_date'] != df_old_rows['filling_date_y']) | (df_old_rows['accepted_date'] != df_old_rows['accepted_date_y'])]
    
    df_log_process = pd.DataFrame([{
        'ingestion_timestamp': datetime.now(),
        'ticker': ticker,
        'num_new_rows': len(df_new_rows),
        'num_old_rows_diff_dates': len(df_old_rows_diff_dates),
        'num_columns': len(df_quarterly.columns)
    }])
    
    print(df_log_process)
    
    return pd.concat([df_new_rows[df.columns], df_old_rows_diff_dates[df.columns]]), df_log_process

In [19]:
df_processed, df_log = process_ticker(ticker, df)

In [20]:
df_log

Unnamed: 0,ingestion_timestamp,ticker,num_new_rows,num_old_rows_diff_dates,num_columns
0,2023-08-10 18:50:59.284352,AAPL,100,0,55


In [21]:
# Main script
dataframes = []
records = []

for ticker in list_ticker:
    df_processed, df_log = process_ticker(ticker, df)
    if df_processed is not None:
        dataframes.append(df_processed)
    if df_log is not None:
        bulk_insert_data_from_dataframe(LogIngestedTickers, df_log)

df_final = pd.concat(dataframes, ignore_index=True)

date_columns = ['filling_date', 'accepted_date', 'current_date']
for col in date_columns:
    df_final[col] = pd.to_datetime(df_final[col])

In [22]:
df_final.shape

(524, 55)

In [23]:
bulk_insert_data_from_dataframe(Balance, df_final)

class Balance(Base):
    __tablename__ = 'balance'
    fiscal_date_ending = Column(String)
    reported_currency = Column(String)
    cik = Column(String, primary_key=True)
    filling_date = Column(Date, primary_key=True)
    accepted_date = Column(Date, primary_key=True)
    calendar_year = Column(Date, primary_key=True)
    period = Column(String, primary_key=True)
    cash_and_cash_equivalents = Column(String)
    short_term_investments = Column(String)
    cash_and_short_term_investments = Column(String)
    net_receivables = Column(String)
    inventory = Column(String)
    other_current_assets = Column(String)
    total_current_assets = Column(String)
    property_plant_equipment_net = Column(String)
    goodwill = Column(String)
    intangible_assets = Column(String)
    goodwill_and_intangible_assets = Column(String)
    long_term_investments = Column(String)
    tax_assets = Column(String)
    other_non_current_assets = Column(String)
    total_non_current_assets = Column(Str

In [26]:
# Salviamo il codice in un file .py
with open('generate_model.py', 'w') as f:
    # Scriviamo le importazioni necessarie all'inizio del file
    f.write("from sqlalchemy.ext.declarative import declarative_base\n")
    f.write("from sqlalchemy import Column, Integer, String, Float, Date\n")
    f.write("Base = declarative_base()\n\n")
    
    # Scriviamo il codice sorgente della classe Balance
    f.write(Balance_code)