In [2]:
from prometheus_client import start_http_server, Summary, Counter, Gauge


EXECUTION_TIME = Summary("function_execution_seconds", "Time spent processing.")
NEW_SYMBOLS = Counter(
    "new_symbols_fetched", "Number of new symbols fetched.", ["run_id"]
)
SYMBOLS_LENGTH = Gauge(
    "symbols_length", "Length of the list of symbols fetched.", ["run_id"]
)
TOTAL_RECORDS = Gauge(
    "total_records", "Total number of records in the profiles table.", ["run_id"]
)
FETCH_PARAMS = Gauge(
    "fetch_params",
    "Parameters passed to fetch_equity_symbols",
    ["country", "market", "run_id"],
)
MISSING_SYMBOLS = Counter(
    "missing_symbols_fetched", "Number of missing new symbols.", ["run_id"]
)

ValueError: Duplicated timeseries in CollectorRegistry: {'function_execution_seconds_count', 'function_execution_seconds', 'function_execution_seconds_created', 'function_execution_seconds_sum'}

In [3]:
from datetime import datetime

run_id = datetime.now().isoformat()


from sqlalchemy import create_engine, Column, String, Integer, Boolean, Date
from sqlalchemy.orm import declarative_base, sessionmaker
import pandas as pd
from tqdm import tqdm
import sys

pd.set_option("display.max_columns", None)

sys.path.append("..")
import os
import financedatabase as fd
from investorkit.investorkit.get_data.base import (
    get_profile,
)


from dotenv import load_dotenv

load_dotenv()


DATABASE_URL = os.getenv("DATABASE_URL")
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

Base = declarative_base()


class Profile(Base):
    __tablename__ = "profiles"
    symbol = Column(String, primary_key=True, index=True)
    companyName = Column(String)
    cik = Column(Integer)
    exchange = Column(String)
    exchangeShortName = Column(String)
    industry = Column(String)
    sector = Column(String)
    country = Column(String)
    ipoDate = Column(Date)
    defaultImage = Column(Boolean)
    isEtf = Column(Boolean)
    isActivelyTrading = Column(Boolean)


Base.metadata.create_all(bind=engine)


FMP_API_KEY = os.getenv("FMP_SECRET_KEY")


@EXECUTION_TIME.time()
def fetch_equity_symbols(country="United States", market="NASDAQ Global Select"):
    equities = fd.Equities()
    selected_columns = [
        "name",
        "currency",
        "sector",
        "industry_group",
        "industry",
        "exchange",
        "market",
        "market_cap",
    ]
    us_equities = equities.select(country=country)
    df_equities = us_equities[us_equities["market"] == market][selected_columns]
    list_symbols = list(df_equities.index)

    SYMBOLS_LENGTH.labels(run_id=run_id).set(len(list_symbols))
    FETCH_PARAMS.labels(country=country, market=market, run_id=run_id).set(
        len(list_symbols)
    )

    return list_symbols


@EXECUTION_TIME.time()
def get_new_symbols(list_symbols, engine):
    existing_symbols_query = "SELECT symbol FROM profiles;"
    existing_symbols = pd.read_sql(existing_symbols_query, con=engine)
    new_symbols = list(set(list_symbols) - set(existing_symbols["symbol"].tolist()))

    NEW_SYMBOLS.labels(run_id=run_id).inc(len(new_symbols))

    return new_symbols


def store_profiles(df_profiles_filtered, engine):
    df_profiles_filtered.to_sql("profiles", con=engine, if_exists="append", index=False)


list_symbols = fetch_equity_symbols()

new_symbols = get_new_symbols(list_symbols, engine)
new_symbols = new_symbols[70:85]


if new_symbols:
    df_profiles = get_profile(new_symbols, FMP_API_KEY)

    missing_symbols = set(new_symbols) - set(df_profiles["symbol"])
    MISSING_SYMBOLS.labels(run_id=run_id).inc(len(missing_symbols))
    list_cols = [
        "symbol",
        "companyName",
        "cik",
        "exchange",
        "exchangeShortName",
        "industry",
        "sector",
        "country",
        "ipoDate",
        "defaultImage",
        "isEtf",
        "isActivelyTrading",
    ]
    df_profiles_filtered = df_profiles[list_cols]

    df_profiles_filtered["ipoDate"].replace("", None, inplace=True)

    store_profiles(df_profiles_filtered, engine)

    query = "SELECT * FROM profiles;"
    df = pd.read_sql(query, engine)
    TOTAL_RECORDS.labels(run_id=run_id).set(len(df))

100%|██████████| 15/15 [00:10<00:00,  1.38it/s]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_profiles_filtered["ipoDate"].replace("", None, inplace=True)


In [2]:
start_http_server(8000)

In [7]:
if __name__ == '__main__':
    # Start up the server to expose the metrics.
    

OSError: [Errno 98] Address already in use

In [4]:
query = "SELECT * FROM profiles;"
df = pd.read_sql(query, engine)

In [5]:
df

Unnamed: 0,symbol,companyName,cik,exchange,exchangeShortName,industry,sector,country,ipoDate,defaultImage,isEtf,isActivelyTrading
0,LANDO,Gladstone Land Corporation,1495240,NASDAQ Global Market,NASDAQ,REIT—Industrial,Real Estate,US,2020-10-19,False,False,True
1,ARCC,Ares Capital Corporation,1287750,NASDAQ Global Market,NASDAQ,Asset Management,Financial Services,US,2004-10-05,False,False,True
2,PBHC,"Pathfinder Bancorp, Inc.",1609065,NASDAQ Capital Market,NASDAQ,Banks—Regional,Financial Services,US,1995-11-16,False,False,True
3,ESSC,East Stone Acquisition Corporation,1760683,NASDAQ Capital Market,NASDAQ,Shell Companies,Financial Services,CN,2020-02-14,True,False,False
4,SAMAW,Schultze Special Purpose Acquisition Corp. II,1843100,NASDAQ Global Market,NASDAQ,Shell Companies,Financial Services,US,2019-01-09,False,False,True
5,GVCI,Green Visor Financial Technology Acquisition C...,1858503,NASDAQ Global Market,NASDAQ,Shell Companies,Financial Services,US,2021-12-30,False,False,False
6,AHAC,Alpha Healthcare Acquisition Corp.,1818382,NASDAQ Global Select,NASDAQ,Shell Companies,Financial Services,US,2020-11-09,False,False,False
7,IBRX,"ImmunityBio, Inc.",1326110,NASDAQ Global Select,NASDAQ,Biotechnology,Healthcare,US,2015-07-28,False,False,True
8,SNAX,"Stryve Foods, Inc.",1691936,NASDAQ Capital Market,NASDAQ,Packaged Foods,Consumer Defensive,US,2019-03-06,False,False,True
9,PRSTW,"Presto Automation, Inc.",1822145,NASDAQ Global Market,NASDAQ,Software—Application,Technology,US,,True,False,True
