<a href="https://colab.research.google.com/github/DimaFrank/Association_Rule_Learning/blob/main/financialmodelingprep_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Data to fetch from API:

Company Information
1. Company Profile
2. Employee count / Historical employee count
3. Market Cap / Historical Market Cap
4. Analyst Estimates
5. Analyst Recomendations
6. Company Outlook
7. Stock Peers
8. Delisted Companies
9. Company Share Float / Historical Share Float

Financial Statements:
1. Income Statement
2. Balance Sheet Statement
3. Cashflow Statement
4. Full Financial Statement As Reported

Statements Analysis:
1. Key Metrics
2. Ratios
3. Cashflow Growth
4. Income Growth
5. Balance Sheet Growth
6. Financial Growth
7. Financial Score

Dividends:
1. Dividends Historical

Splits:
1. Splits Historical

Technical Indicators:
ALL

Market Performance:
1. Sector Historical


Economic Data:
1. Tresury Rates
2. Economic Indicators

Constituents:
1. Historical S&P 500

### Dependencies

In [38]:
pip install sshtunnel mysqlclient -q

In [39]:
!pip install -q pyspark

In [40]:
!pip install -q delta-spark

In [41]:
from google.colab import drive
from google.colab import userdata

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
import pandas as pd
import numpy as np
import requests
import os
import json
import time

from delta import *


pd.set_option('display.max_columns', None)

### Mysql Database Connection

In [43]:
import sshtunnel
import MySQLdb
from sqlalchemy import create_engine


class DBConnector:

    def establish_ssh_tunnel_connection(self):

        ssh_username = userdata.get('ssh_username')
        ssh_password = userdata.get('ssh_password')

        mysql_username = userdata.get('mysql_username')
        mysql_password = userdata.get('mysql_password')
        db_host_address = userdata.get('db_host_address')
        db_name = userdata.get('db_name')

        tunnel = sshtunnel.SSHTunnelForwarder(
            ('ssh.pythonanywhere.com'),
            ssh_username=ssh_username, ssh_password=ssh_password,
            remote_bind_address=(db_host_address, 3306)
        )
        tunnel.start()

        connection = MySQLdb.connect(
            user=mysql_username,
            passwd=mysql_password,
            host='127.0.0.1', port=tunnel.local_bind_port,
            db=db_name,
        )
        return tunnel, connection


    def execute_query(self, sql):

        tunnel, connection = self.establish_ssh_tunnel_connection()
        cursor = connection.cursor()
        cursor.execute(sql)
        connection.commit()

        rows = cursor.fetchall()

        cursor.close()
        connection.close()
        tunnel.stop()

        return rows


    def write_df_to_db(self, df, table_name, mode='replace'):

        tunnel, connection = self.establish_ssh_tunnel_connection()

        mysql_username = userdata.get('mysql_username')
        mysql_password = userdata.get('mysql_password')
        db_name = userdata.get('db_name')

        engine = create_engine(
            f"mysql+mysqldb://{mysql_username}:{mysql_password}@127.0.0.1:{tunnel.local_bind_port}/{db_name}"
        )

        df.to_sql(table_name, con=engine, if_exists=mode, index=False)

        engine.dispose()
        connection.close()
        tunnel.stop()


### Utility functions

In [44]:
from IPython.core.display import HTML
from IPython.utils.capture import capture_output
import subprocess


def display(sdf, n=10):
    pd_df = sdf.limit(n).toPandas()
    return HTML(pd_df.to_html(render_links=True, escape=False))


def get_jsonparsed_data(url):

    headers = {
        "Accept": "application/json",
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

### Create Spark Session

In [45]:
import os
os.environ["PYSPARK_SUBMIT_ARGS"] = "--driver-memory 8g pyspark-shell"

In [46]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip
import pyspark.sql.types as T
import pyspark.sql.functions as F


builder = (SparkSession.builder.appName("Rebalance_AI")
                               .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
                               .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
                               .config("spark.jars.packages","io.delta:delta-core_2.12:2.0.0")
)

spark = configure_spark_with_delta_pip(builder).getOrCreate()
spark.conf.set("spark.sql.parquet.datetimeRebaseModeInWrite", "CORRECTED")

spark

In [47]:
def _enable_sparkui(port=4040):
    from google.colab import output
    return output.serve_kernel_port_as_window(port, path='/jobs/index.html')

_enable_sparkui()

Try `serve_kernel_port_as_iframe` instead. [0m


<IPython.core.display.Javascript object>

### Delta Lake Config

In [48]:
class DeltaLakeConfig:

  def __init__(self, location, spark_session):
    self.location = location
    self.spark_session = spark_session


  def set_db(self, db_name):
    self.spark_session.sql(f"CREATE DATABASE IF NOT EXISTS {db_name} LOCATION 'file:{os.path.join(self.location, db_name+'.db')}' ")
    self.spark_session.sql(f"USE {db_name}")
    for table in os.listdir(os.path.join(self.location, db_name+'.db')):
        self.spark_session.sql(f"""
                                  CREATE TABLE IF NOT EXISTS {db_name}.{table}
                                  USING DELTA
                                  LOCATION 'file:{os.path.join(self.location, db_name+'.db', table)}'
                               """)


In [49]:
delta_config = DeltaLakeConfig(location = "/content/drive/My Drive/DeltaLake/", spark_session = spark)
delta_config.set_db("financial_modeling")

### API KEY

In [50]:
API_KEY = userdata.get('FINMODEL_API_KEY')

### Get Stock List

In [None]:
# base_url = 'https://financialmodelingprep.com/api/v3/stock/list'


# schema = T.StructType([
#     T.StructField("symbol", T.StringType(), True),
#     T.StructField("name", T.StringType(), True),
#     T.StructField("exchange", T.StringType(), True),
#     T.StructField("exchangeShortName", T.StringType(), True),
#     T.StructField("type", T.StringType(), True)
# ])


# if API_KEY:

#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     for stock in data:
#         stock.pop('price', None)

#     stock_list_df = spark.createDataFrame(data, schema=schema)

# else:

#     print("API key not found. Please set the FMP_API_KEY environment variable.")


# (stock_list_df.write
#               .format("delta")
#               .mode("overwrite")
#               .option('overwriteSchema', True)
#               .saveAsTable("stock_list")
# )


# display(spark.sql('select * from financial_modeling.stock_list'))

In [None]:
display(spark.sql('select count(*) as count from financial_modeling.stock_list'))

Unnamed: 0,count
0,84820


In [None]:
display(spark.sql('DESCRIBE HISTORY financial_modeling.stock_list'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,0,2025-02-22 09:20:04,,,CREATE OR REPLACE TABLE AS SELECT,"{'partitionBy': '[]', 'description': None, 'properties': '{}', 'clusterBy': '[]', 'isManaged': 'true'}",,,,,Serializable,False,"{'numOutputRows': '84820', 'numOutputBytes': '1983461', 'numFiles': '2'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [51]:
stock_list_df = spark.sql("""
                            select *
                            from financial_modeling.stock_list
                            where 1=1
                              and exchangeShortName in ('NASDAQ', 'NYSE')
                              and type = "stock"
                              and symbol not like "%-%"
                          """)

stock_list = [row.symbol for row in stock_list_df.select('symbol').collect()]

print(f"Rows:{stock_list_df.count()}")
display(stock_list_df)

Rows:9798


Unnamed: 0,symbol,name,exchange,exchangeShortName,type
0,ONMDW,OneMedNet Corporation,NASDAQ Global Market,NASDAQ,stock
1,NOEMW,CO2 Energy Transition Corp. Warrant,Nasdaq,NASDAQ,stock
2,VRMEW,"VerifyMe, Inc.",NASDAQ Capital Market,NASDAQ,stock
3,MACIW,Melar Acquisition Corp. I Warrant,Nasdaq,NASDAQ,stock
4,VACHU,Voyager Acquisition Corp Unit,NASDAQ Global Market,NASDAQ,stock
5,EXEEL,Expand Energy Corporation,NASDAQ Global Select,NASDAQ,stock
6,MKDWW,MKD Technology Inc.,NASDAQ Global Market,NASDAQ,stock
7,MBAVU,M3-Brigade Acquisition V Corp. Units,Nasdaq,NASDAQ,stock
8,BUJA,Bukit Jalil Global Acquisition 1 Ltd,NASDAQ Capital Market,NASDAQ,stock
9,WINVR,WinVest Acquisition Corp.,NASDAQ Capital Market,NASDAQ,stock


# Get Data From API

## Company Information:

### 1.Company Profile


In [None]:
# request_counter = 0
# sleeping_time = 65


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/profile/{stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         combined_df = combined_df.withColumn("price", F.col("price").cast("double")) \
#                                  .withColumn("beta", F.col("beta").cast("double")) \
#                                  .withColumn("volAvg", F.col("volAvg").cast("double")) \
#                                  .withColumn("mktCap", F.col("mktCap").cast("double")) \
#                                  .withColumn("lastDiv", F.col("lastDiv").cast("double")) \
#                                  .withColumn("changes", F.col("changes").cast("double")) \
#                                  .withColumn("fullTimeEmployees", F.col("fullTimeEmployees").cast("int")) \
#                                  .withColumn("dcfDiff", F.col("dcfDiff").cast("double")) \
#                                  .withColumn("dcf", F.col("dcf").cast("double")) \
#                                  .withColumn("ipoDate", F.col("ipoDate").cast("date")) \
#                                  .withColumn("defaultImage", F.col("defaultImage").cast("boolean")) \
#                                  .withColumn("isEtf", F.col("isEtf").cast("boolean")) \
#                                  .withColumn("isActivelyTrading", F.col("isActivelyTrading").cast("boolean")) \
#                                  .withColumn("isAdr", F.col("isAdr").cast("boolean")) \
#                                  .withColumn("isFund", F.col("isFund").cast("boolean"))

#         combined_df = combined_df.repartition(1)
#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('company_profile'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.company_profile'))


Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds...
Data was appended successfully.
Sleeping for 65 seconds.

Unnamed: 0,symbol,price,beta,volAvg,mktCap,lastDiv,range,changes,companyName,currency,cik,isin,cusip,exchange,exchangeShortName,industry,website,description,ceo,sector,country,fullTimeEmployees,phone,address,city,state,zip,dcfDiff,dcf,image,ipoDate,defaultImage,isEtf,isActivelyTrading,isAdr,isFund
0,ONMDW,0.0221,0.417,41050.41279,750639.0,0.0,0.0221-0.0224,-0.0001,OneMedNet Corporation,USD,1849380.0,US68270C1119,68270C111,NASDAQ Global Market,NASDAQ,Software - Application,https://www.onemednet.com,"OneMedNet Corporation provides clinical imaging solutions. It offers iRWD, a solution that utilizes AI to securely de-identify, search, and curate imaging data for its partner network consisting of medical and academic research institutions to generate progression in stages of medical research, discovery and diagnostics that span the field of life sciences. The company is based in Eden Prairie, Minnesota.",Mr. Aaron Green,Technology,US,20.0,800-918-7189,6385 Old Shady Oak Road,Eden Prairie,MN,55344.0,0.9138,-0.002796,https://images.financialmodelingprep.com/symbol/ONMDW.png,,False,False,True,False,False
1,NOEMW,0.1,0.0,39664.82353,0.0,0.0,0.1-0.1,0.02,CO2 Energy Transition Corp. Warrant,,,US12664M1119,,Nasdaq,NASDAQ,,,,Mr. Brady Douglas Rodgers,,,,,,,,,,0.0,https://images.financialmodelingprep.com/symbol/NOEMW.png,,True,False,True,True,False
2,VRMEW,0.0825,0.539,44801.75229,11613490.0,0.0,0.0825-0.0999,-0.0375,"VerifyMe, Inc.",USD,1104038.0,US92346X1155,92346X115,NASDAQ Capital Market,NASDAQ,Security & Protection Services,https://www.verifyme.com,"VerifyMe, Inc., together with its subsidiary, PeriShip Global, LLC, operates as a technology solutions provider that specializes in products to connect brands with consumers and providing brands with end-to-end logistics management for their products. The company operates through two segments, VerifyMe Solutions and PeriShip Global Solutions. The VerifyMe Solutions segment offers technology solutions to connect brands with consumers allowing brand owners to gather business intelligence while engaging directly with their consumers. Its solutions provide brand protection and supply chain functions, such as counterfeit prevention, traceability, consumer engagement solutions, and authentication for labels, packaging, and products, as well as tamper-proof labels. The PeriShip Global Solutions segment offers predictive analytics for optimizing delivery of time and temperature sensitive perishable products. This segment's products include PeriTrack customer dashboard, an integrated web portal tool gives its customers an in-depth look at their shipping activities based on real-time data. It also provides call center, pre-transit, post-delivery, and weather/traffic services. The company has a strategic partnership with INX International Ink Company. The company was formerly known as LaserLock Technologies, Inc. and changed its name to VerifyMe, Inc. in July 2015. VerifyMe, Inc. was incorporated in 1999 and is headquartered in Lake Mary, Florida.",Mr. Adam H. Stedham,Industrials,US,50.0,585 736 9400,801 International Parkway,Lake Mary,NV,32746.0,3.93411,4.95531,https://images.financialmodelingprep.com/symbol/VRMEW.png,2020-06-18,False,False,True,False,False
3,MACIW,0.131,2.957613,45192.08247,2259750.0,0.0,0.131-0.1565,-0.0255,Melar Acquisition Corp. I Warrant,,2016221.0,KYG6004G1183,,Nasdaq,NASDAQ,,,,,,KY,,,,,,,,0.0,https://images.financialmodelingprep.com/symbol/MACIW.png,,True,False,True,False,False
4,VACHU,10.214,-0.039302,1589.0,323651500.0,0.0,9.97-10.25,0.024,Voyager Acquisition Corp Unit,USD,2006815.0,KYG93A7H1207,,NASDAQ Global Market,NASDAQ,,,,,,,,,,,,,,0.0,https://images.financialmodelingprep.com/symbol/VACHU.png,,True,False,True,False,False
5,EXEEL,93.75,0.508,6683.0,24066400000.0,0.0,50.86-96.092,4.61,Expand Energy Corporation,USD,895126.0,US1651671802,,NASDAQ Global Select,NASDAQ,Energy,https://www.chk.com,"Expand Energy Corporation operates as an independent exploration and production company in the United States. It engages in acquisition, exploration, and development of properties to produce oil, natural gas, and natural gas liquids from underground reservoirs. The company holds interests in natural gas resource plays in the Marcellus Shale in the northern Appalachian Basin in Pennsylvania and the Haynesville/Bossier Shales in northwestern Louisiana. As of December 31, 2023, the company owns a portfolio of onshore U.S. unconventional natural gas assets, including interests in approximately 5,000 natural gas wells. The company was formerly known as Chesapeake Energy Corporation and changed its name to Expand Energy Corporation in October 2024. Expand Energy Corporation was founded in 1989 and is based in Oklahoma City, Oklahoma.",Mr. Domenic J. Dell'Osso Jr.,Energy,US,1000.0,405 848 8000,6100 North Western Avenue,Oklahoma City,OK,73118.0,,0.0,https://images.financialmodelingprep.com/symbol/EXEEL.png,,True,False,True,False,False
6,MKDWW,0.012,-7.12,35875.70833,0.0,0.0,0.012-0.0122,0.0,MKD Technology Inc.,USD,1991332.0,,,NASDAQ Global Market,NASDAQ,,,,,,VG,,,,,,,,0.0,https://images.financialmodelingprep.com/symbol/MKDWW.png,,True,False,True,False,False
7,MBAVU,10.21,0.16,1023.0,255250000.0,0.0,10.01-11.55,-0.08,M3-Brigade Acquisition V Corp. Units,USD,2016072.0,,,Nasdaq,NASDAQ,,,,,,KY,,,,,,,,0.0,https://images.financialmodelingprep.com/symbol/MBAVU.png,,True,False,True,False,False
8,BUJA,11.35,-0.01784,9310.0,56084000.0,0.0,10.41-12.18,0.0,Bukit Jalil Global Acquisition 1 Ltd,USD,1956055.0,KYG1676M1050,,NASDAQ Capital Market,NASDAQ,Shell Companies,,"Bukit Jalil Global Acquisition 1 Ltd operates a blank check company that intends to effect into a merger, share exchange, asset acquisition, share purchase, recapitalization, reorganization, or similar business combination with one or more businesses or entities. The company was incorporated in 2022 and is based in Kuala Lumpur, Malaysia.",Mr. Seck Chyn Foo,Financial Services,MY,,60 3 9133 9688,31-1 Taman Miharja Phase 3B,Kuala Lumpur,,55200.0,,0.0,https://images.financialmodelingprep.com/symbol/BUJA.png,2023-08-21,False,False,True,False,False
9,WINVR,0.14,0.036,53702.91912,40913100.0,0.0,0.14-0.149,0.0,WinVest Acquisition Corp.,USD,1854463.0,US97655B1254,97655B125,NASDAQ Capital Market,NASDAQ,Shell Companies,,"WinVest Acquisition Corp. intends to effect a merger, capital stock exchange, asset acquisition, stock purchase, reorganization, or similar business combination with one or more businesses or entities. It intends to focus its search on target businesses in the financial services industry with a focus on financial media, brokerage, banking, investing, and wealth management. The company was incorporated in 2021 and is based in Cambridge, Massachusetts.",Mr. Manish Jhunjhunwala,Financial Services,US,2.0,617-658-3094,125 Cambridgepark Drive,Cambridge,MA,2140.0,,0.0,https://images.financialmodelingprep.com/symbol/WINVR.png,2021-10-04,False,False,True,False,False


In [None]:
display(spark.sql('describe history financial_modeling.company_profile'))


Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-23 16:57:15,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '197492', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-23 16:52:59,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '201883', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-23 16:48:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '209148', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-23 16:44:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '202189', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-23 16:40:51,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '210764', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-23 16:36:26,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '201727', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-23 16:32:24,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '198845', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-23 16:28:14,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '228362', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-23 16:24:14,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '229984', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-23 16:20:11,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '300', 'numOutputBytes': '239855', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('DESCRIBE financial_modeling.company_profile'), n=100)


Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,price,double,
2,beta,double,
3,volAvg,double,
4,mktCap,double,
5,lastDiv,double,
6,range,string,
7,changes,double,
8,companyName,string,
9,currency,string,


In [None]:
display(spark.sql('select * from financial_modeling.company_profile limit 1'))

Unnamed: 0,symbol,price,beta,volAvg,mktCap,lastDiv,range,changes,companyName,currency,cik,isin,cusip,exchange,exchangeShortName,industry,website,description,ceo,sector,country,fullTimeEmployees,phone,address,city,state,zip,dcfDiff,dcf,image,ipoDate,defaultImage,isEtf,isActivelyTrading,isAdr,isFund
0,ONMDW,0.0221,0.417,41050.41279,750639.0,0.0,0.0221-0.0224,-0.0001,OneMedNet Corporation,USD,1849380,US68270C1119,68270C111,NASDAQ Global Market,NASDAQ,Software - Application,https://www.onemednet.com,"OneMedNet Corporation provides clinical imaging solutions. It offers iRWD, a solution that utilizes AI to securely de-identify, search, and curate imaging data for its partner network consisting of medical and academic research institutions to generate progression in stages of medical research, discovery and diagnostics that span the field of life sciences. The company is based in Eden Prairie, Minnesota.",Mr. Aaron Green,Technology,US,20,800-918-7189,6385 Old Shady Oak Road,Eden Prairie,MN,55344,0.9138,-0.002796,https://images.financialmodelingprep.com/symbol/ONMDW.png,,False,False,True,False,False


### 2.Employee count / Historical employee count

In [None]:
# request_counter = 0
# sleeping_time = 65


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v4/historical/employee_count?symbol={stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         combined_df = combined_df.withColumn("acceptanceTime", F.col("acceptanceTime").cast("timestamp")) \
#                                  .withColumn("periodOfReport", F.col("periodOfReport").cast("date")) \
#                                  .withColumn("filingDate", F.col("filingDate").cast("date")) \
#                                  .withColumn("employeeCount", F.col("employeeCount").cast("int"))

#         combined_df = combined_df.repartition(1)
#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('historical_employee'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.historical_employee'))


In [None]:
display(spark.sql('select * from financial_modeling.historical_employee limit 5'))

Unnamed: 0,symbol,cik,acceptanceTime,periodOfReport,companyName,formType,filingDate,employeeCount,source
0,RDDT,1713445,2025-02-12 18:11:55,2024-12-31,"Reddit, Inc.",10-K,2025-02-13,2233,https://www.sec.gov/Archives/edgar/data/1713445/000171344525000018/0001713445-25-000018-index.htm
1,BL,1666134,2025-02-21 16:07:22,2024-12-31,"BlackLine, Inc.",10-K,2025-02-21,1830,https://www.sec.gov/Archives/edgar/data/1666134/000166613425000003/0001666134-25-000003-index.htm
2,BL,1666134,2024-02-23 16:07:08,2023-12-31,"BlackLine, Inc.",10-K,2024-02-23,1750,https://www.sec.gov/Archives/edgar/data/1666134/000166613424000003/0001666134-24-000003-index.htm
3,BL,1666134,2023-02-23 16:05:05,2022-12-31,"BlackLine, Inc.",10-K,2023-02-23,1814,https://www.sec.gov/Archives/edgar/data/1666134/000166613423000003/0001666134-23-000003-index.htm
4,BL,1666134,2022-02-25 16:08:08,2021-12-31,"BlackLine, Inc.",10-K,2022-02-25,1557,https://www.sec.gov/Archives/edgar/data/1666134/000166613422000003/0001666134-22-000003-index.htm


In [None]:
display(spark.sql('describe history financial_modeling.historical_employee'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-23 18:50:14,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '2720', 'numOutputBytes': '121615', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-23 18:46:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '2737', 'numOutputBytes': '121373', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-23 18:42:43,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '3119', 'numOutputBytes': '138415', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-23 18:39:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '2824', 'numOutputBytes': '125883', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-23 18:36:28,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '3124', 'numOutputBytes': '138448', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-23 18:33:21,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '2791', 'numOutputBytes': '124166', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-23 18:29:57,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '2572', 'numOutputBytes': '117400', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-23 18:26:36,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '3557', 'numOutputBytes': '155659', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-23 18:23:22,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '3864', 'numOutputBytes': '165908', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-23 18:20:00,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '4101', 'numOutputBytes': '176305', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.historical_employee'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,cik,string,
2,acceptanceTime,timestamp,
3,periodOfReport,date,
4,companyName,string,
5,formType,string,
6,filingDate,date,
7,employeeCount,int,
8,source,string,


In [None]:
spark.sql('select count(distinct symbol) from financial_modeling.historical_employee').show()

+----------------------+
|count(DISTINCT symbol)|
+----------------------+
|                  8364|
+----------------------+



### 3.Market Cap / Historical Market Cap

##### Stable Version

In [None]:
request_counter = 0
sleeping_time = 60


dfs = []
missing_stocks = []


intervals = [(1990, 2000), (2001, 2011), (2012, 2022), (2023, 2025)]

if API_KEY:

  # time.sleep(sleeping_time)

  for stock in stock_list:

    for interval in intervals:

      base_url = f'https://financialmodelingprep.com/stable/historical-market-capitalization?symbol={stock}&from={interval[0]}-01-01&to={interval[1]}-12-31'

      # if request_counter==3000:
      if len(dfs) == 1000:

          combined_df = dfs[0]
          for df in dfs[1:]:
              combined_df = combined_df.union(df)

          combined_df = combined_df.withColumn("date", F.col("date").cast("date")) \
                                  .withColumn("marketCap", F.col("marketCap").cast(T.LongType()))

          combined_df = combined_df.repartition(1)
          (combined_df.write
              .format("delta")
              .mode('append')
              .saveAsTable('stable_historical_market_cap'))

          print('Data was appended successfully.')

          dfs = []
          request_counter = 0

          print(f"Sleeping for {sleeping_time} seconds...")
          time.sleep(sleeping_time)


      url = f"{base_url}&apikey={API_KEY}"
      data = get_jsonparsed_data(url)

      if data and isinstance(data, list) and len(data) > 0:

          schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
          df = spark.createDataFrame(data, schema=schema)
          dfs.append(df)
          request_counter += 1
          print(f'Appending: Stock:{stock}, Interval:{interval}, request_counter:{request_counter}')

      else:
          missing_stocks.append(stock)
          request_counter += 1
          print(f'No data for: Stock:{stock}, Interval:{interval}, request_counter:{request_counter}')


else:
    print("API key not found. Please set the FMP_API_KEY environment variable.")




print(f"Missing stocks: {len(missing_stocks)}.")
display(spark.sql('select * from financial_modeling.stable_historical_market_cap'))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Appending: Stock:MSAIW, Interval:(2023, 2025), request_counter:1635
No data for: Stock:XBIO, Interval:(1990, 2000), request_counter:1636
No data for: Stock:XBIO, Interval:(2001, 2011), request_counter:1637
Appending: Stock:XBIO, Interval:(2012, 2022), request_counter:1638
Appending: Stock:XBIO, Interval:(2023, 2025), request_counter:1639
No data for: Stock:DSY, Interval:(1990, 2000), request_counter:1640
No data for: Stock:DSY, Interval:(2001, 2011), request_counter:1641
Appending: Stock:DSY, Interval:(2012, 2022), request_counter:1642
Appending: Stock:DSY, Interval:(2023, 2025), request_counter:1643
No data for: Stock:FTEL, Interval:(1990, 2000), request_counter:1644
No data for: Stock:FTEL, Interval:(2001, 2011), request_counter:1645
No data for: Stock:FTEL, Interval:(2012, 2022), request_counter:1646
Appending: Stock:FTEL, Interval:(2023, 2025), request_counter:1647
No data for: Stock:ULH, Interval:(1990, 2000), reques

Unnamed: 0,symbol,date,marketCap
0,BHE,2000-12-29,396691515
1,BHE,2000-12-28,381266820
2,BHE,2000-12-27,368215155
3,BHE,2000-12-26,362678085
4,BHE,2000-12-22,373356720
5,BHE,2000-12-21,351603945
6,BHE,2000-12-20,348439905
7,BHE,2000-12-19,428331915
8,BHE,2000-12-18,467091405
9,BHE,2000-12-15,484493625


In [None]:
 display(spark.sql("""
                select min(date) as min_date, max(date) as max_date, count(*) as count
                from financial_modeling.stable_historical_market_cap


                """)


)


Unnamed: 0,min_date,max_date,count
0,1990-01-02,2025-05-09,26144395


In [None]:
# New Version

base_url = f'https://financialmodelingprep.com/stable/historical-market-capitalization?symbol=AAPL&from=1990-01-01&to=2000-12-31&apikey={API_KEY}'

data = get_jsonparsed_data(base_url)


schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])

df = spark.createDataFrame(data, schema=schema)
df.createOrReplaceTempView('df')



# display(spark.sql('select min(date), max(date), count(*) from df'))
display(spark.sql('select * from df order by date'))


Unnamed: 0,symbol,date,marketCap
0,AAPL,1990-01-02,4720731292
1,AAPL,1990-01-03,4752383569
2,AAPL,1990-01-04,4768280676
3,AAPL,1990-01-05,4784035846
4,AAPL,1990-01-08,4815830061
5,AAPL,1990-01-09,4768280676
6,AAPL,1990-01-10,4562327969
7,AAPL,1990-01-11,4372272369
8,AAPL,1990-01-12,4372272369
9,AAPL,1990-01-15,4340478153


#### Legacy Version

In [None]:
# request_counter = 0
# sleeping_time = 90


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/historical-market-capitalization/{stock}?limit=10000&from=2000-01-01&to=2025-03-01'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         combined_df = combined_df.withColumn("date", F.col("date").cast("date")) \
#                                  .withColumn("marketCap", F.col("marketCap").cast(T.LongType()))

#         combined_df = combined_df.repartition(1)
#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('historical_market_cap'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")




# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.historical_market_cap'))


In [None]:
display(spark.sql('select * from financial_modeling.historical_market_cap limit 5'))


Unnamed: 0,symbol,date,marketCap
0,CHT,2025-02-21,30277315641
1,CHT,2025-02-20,30145439042
2,CHT,2025-02-19,29796353926
3,CHT,2025-02-18,30091136913
4,CHT,2025-02-14,29881685844


In [None]:
display(spark.sql('describe history financial_modeling.historical_market_cap'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-23 21:24:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '289652', 'numOutputBytes': '1811608', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-23 21:20:29,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '283203', 'numOutputBytes': '2031336', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-23 21:16:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '300297', 'numOutputBytes': '2105888', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-23 21:12:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '296179', 'numOutputBytes': '1881498', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-23 21:08:39,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '308138', 'numOutputBytes': '2283144', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-23 21:04:21,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '295623', 'numOutputBytes': '2095432', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-23 21:00:31,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '283772', 'numOutputBytes': '1791153', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-23 20:56:36,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '325098', 'numOutputBytes': '2196298', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-23 20:52:47,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '329260', 'numOutputBytes': '2245014', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-23 20:49:01,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '340668', 'numOutputBytes': '2340419', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.historical_market_cap'))

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,marketCap,bigint,


In [None]:
spark.sql('select count(distinct symbol) from financial_modeling.historical_market_cap').show()

+----------------------+
|count(DISTINCT symbol)|
+----------------------+
|                  8139|
+----------------------+



### 4.Analyst Estimates

In [None]:
# request_counter = 0
# sleeping_time = 90


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/analyst-estimates/{stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         combined_df = combined_df.withColumn("date", F.col("date").cast("date")) \
#                                  .withColumn("estimatedRevenueLow", F.col("estimatedRevenueLow").cast(T.LongType())) \
#                                  .withColumn("estimatedRevenueHigh", F.col("estimatedRevenueHigh").cast(T.LongType())) \
#                                  .withColumn("estimatedRevenueAvg", F.col("estimatedRevenueAvg").cast(T.LongType())) \
#                                  .withColumn("estimatedEbitdaLow", F.col("estimatedEbitdaLow").cast(T.LongType())) \
#                                  .withColumn("estimatedEbitdaHigh", F.col("estimatedEbitdaHigh").cast(T.LongType())) \
#                                  .withColumn("estimatedEbitdaAvg", F.col("estimatedEbitdaAvg").cast(T.LongType()))  \
#                                  .withColumn("estimatedEbitLow", F.col("estimatedEbitLow").cast(T.LongType())) \
#                                  .withColumn("estimatedEbitHigh", F.col("estimatedEbitHigh").cast(T.LongType())) \
#                                  .withColumn("estimatedEbitAvg", F.col("estimatedEbitAvg").cast(T.LongType())) \
#                                  .withColumn("estimatedNetIncomeLow", F.col("estimatedNetIncomeLow").cast(T.LongType())) \
#                                  .withColumn("estimatedNetIncomeHigh", F.col("estimatedNetIncomeHigh").cast(T.LongType())) \
#                                  .withColumn("estimatedNetIncomeAvg", F.col("estimatedNetIncomeAvg").cast(T.LongType()))  \
#                                  .withColumn("estimatedSgaExpenseLow", F.col("estimatedSgaExpenseLow").cast(T.LongType())) \
#                                  .withColumn("estimatedSgaExpenseHigh", F.col("estimatedSgaExpenseHigh").cast(T.LongType())) \
#                                  .withColumn("estimatedSgaExpenseAvg", F.col("estimatedSgaExpenseAvg").cast(T.LongType()))  \
#                                  .withColumn("estimatedEpsAvg", F.col("estimatedEpsAvg").cast('double')) \
#                                  .withColumn("estimatedEpsHigh", F.col("estimatedEpsHigh").cast('double')) \
#                                  .withColumn("estimatedEpsLow", F.col("estimatedEpsLow").cast('double'))  \
#                                  .withColumn("numberAnalystEstimatedRevenue", F.col("numberAnalystEstimatedRevenue").cast('int')) \
#                                  .withColumn("numberAnalystsEstimatedEps", F.col("numberAnalystsEstimatedEps").cast('int'))


#         combined_df = combined_df.repartition(1)
#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('analyst_estimates'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.analyst_estimates'))


Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds.

Unnamed: 0,symbol,date,estimatedRevenueLow,estimatedRevenueHigh,estimatedRevenueAvg,estimatedEbitdaLow,estimatedEbitdaHigh,estimatedEbitdaAvg,estimatedEbitLow,estimatedEbitHigh,estimatedEbitAvg,estimatedNetIncomeLow,estimatedNetIncomeHigh,estimatedNetIncomeAvg,estimatedSgaExpenseLow,estimatedSgaExpenseHigh,estimatedSgaExpenseAvg,estimatedEpsAvg,estimatedEpsHigh,estimatedEpsLow,numberAnalystEstimatedRevenue,numberAnalystsEstimatedEps
0,PIXY,2025-08-31,19600000,19600000,19600000,-16227443,-16227443,-16227443,-16297096,-16297096,-16297096,-25340600,-25340600,-25340600,23494130,23494130,23494130,-47.1,-47.1,-47.1,1,1
1,PIXY,2024-08-31,15900000,15900000,15900000,-13164099,-13164099,-13164099,-13220603,-13220603,-13220603,-26309031,-26309031,-26309031,19059014,19059014,19059014,-48.9,-48.9,-48.9,1,1
2,PIXY,2023-08-31,18300000,18300000,18300000,-15151133,-15151133,-15151133,-15216166,-15216166,-15216166,-379624795,-379624795,-379624795,21935846,21935846,21935846,-705.6,-705.6,-705.6,1,1
3,PIXY,2021-08-31,40000000,40000000,40000000,-33117232,-33117232,-33117232,-33259381,-33259381,-33259381,-9240699600,-9240699600,-9240699600,47947206,47947206,47947206,-412200.0,-412200.0,-412200.0,1,1
4,PIXY,2020-08-31,46100000,46100000,46100000,-38167610,-38167610,-38167610,-38331436,-38331436,-38331436,-12162213360,-12162213360,-12162213360,55259155,55259155,55259155,-542520.0,-542520.0,-542520.0,1,1
5,PIXY,2019-08-31,53193000,53193000,53193000,-44040123,-44040123,-44040123,-44229156,-44229156,-44229156,-9361756800,-9361756800,-9361756800,63761393,63761393,63761393,-417600.0,-417600.0,-417600.0,1,1
6,CSSEP,2023-12-31,505620220,505620220,505620220,51148571,51148571,51148571,-168092896,-168092896,-168092896,0,0,0,184481593,184481593,184481593,,0.0,0.0,0,0
7,CSSEP,2022-12-31,252810110,252810110,252810110,25574285,25574285,25574285,-84046448,-84046448,-84046448,0,0,0,92240796,92240796,92240796,,0.0,0.0,0,0
8,ONCT,2028-12-31,1000000,1000000,1000000,-600000,-600000,-600000,-1000000,-1000000,-1000000,-14023804,-14023804,-14023804,6688209,6688209,6688209,-4.77,-4.77,-4.77,1,1
9,ONCT,2027-12-31,1000000,1000000,1000000,-600000,-600000,-600000,-1000000,-1000000,-1000000,-19962606,-19962606,-19962606,6688209,6688209,6688209,-6.79,-6.79,-6.79,1,1


In [None]:
display(spark.sql('describe history financial_modeling.analyst_estimates'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-25 10:56:37,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '2029', 'numOutputBytes': '235725', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-25 10:51:57,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '2059', 'numOutputBytes': '234641', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-25 10:47:06,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '2258', 'numOutputBytes': '261605', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-25 10:42:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '2164', 'numOutputBytes': '247768', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-25 10:37:23,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '2436', 'numOutputBytes': '285972', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-25 10:32:46,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '2172', 'numOutputBytes': '248556', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-25 10:27:52,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '2038', 'numOutputBytes': '234683', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-25 10:23:20,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '3168', 'numOutputBytes': '364427', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-25 10:18:22,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '3679', 'numOutputBytes': '430978', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-25 10:13:08,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '3757', 'numOutputBytes': '439213', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.analyst_estimates'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,estimatedRevenueLow,bigint,
3,estimatedRevenueHigh,bigint,
4,estimatedRevenueAvg,bigint,
5,estimatedEbitdaLow,bigint,
6,estimatedEbitdaHigh,bigint,
7,estimatedEbitdaAvg,bigint,
8,estimatedEbitLow,bigint,
9,estimatedEbitHigh,bigint,


In [None]:
display(spark.sql('select * from financial_modeling.analyst_estimates limit 3'))

Unnamed: 0,symbol,date,estimatedRevenueLow,estimatedRevenueHigh,estimatedRevenueAvg,estimatedEbitdaLow,estimatedEbitdaHigh,estimatedEbitdaAvg,estimatedEbitLow,estimatedEbitHigh,estimatedEbitAvg,estimatedNetIncomeLow,estimatedNetIncomeHigh,estimatedNetIncomeAvg,estimatedSgaExpenseLow,estimatedSgaExpenseHigh,estimatedSgaExpenseAvg,estimatedEpsAvg,estimatedEpsHigh,estimatedEpsLow,numberAnalystEstimatedRevenue,numberAnalystsEstimatedEps
0,PIXY,2025-08-31,19600000,19600000,19600000,-16227443,-16227443,-16227443,-16297096,-16297096,-16297096,-25340600,-25340600,-25340600,23494130,23494130,23494130,-47.1,-47.1,-47.1,1,1
1,PIXY,2024-08-31,15900000,15900000,15900000,-13164099,-13164099,-13164099,-13220603,-13220603,-13220603,-26309031,-26309031,-26309031,19059014,19059014,19059014,-48.9,-48.9,-48.9,1,1
2,PIXY,2023-08-31,18300000,18300000,18300000,-15151133,-15151133,-15151133,-15216166,-15216166,-15216166,-379624795,-379624795,-379624795,21935846,21935846,21935846,-705.6,-705.6,-705.6,1,1


In [None]:
spark.sql('select count(distinct symbol) from financial_modeling.analyst_estimates').show()

+----------------------+
|count(DISTINCT symbol)|
+----------------------+
|                  5304|
+----------------------+



### 5.Analyst Recomendations

In [None]:
# request_counter = 0
# sleeping_time = 90


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/analyst-stock-recommendations/{stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         combined_df = combined_df.withColumn("date", F.col("date").cast("date")) \
#                                  .withColumn("analystRatingsbuy", F.col("analystRatingsbuy").cast('int')) \
#                                  .withColumn("analystRatingsHold", F.col("analystRatingsHold").cast('int')) \
#                                  .withColumn("analystRatingsSell", F.col("analystRatingsSell").cast('int')) \
#                                  .withColumn("analystRatingsStrongSell", F.col("analystRatingsStrongSell").cast('int')) \
#                                  .withColumn("analystRatingsStrongBuy", F.col("analystRatingsStrongBuy").cast('int'))


#         combined_df = combined_df.repartition(1)
#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('analyst_recommendations'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.analyst_recommendations'))


Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds.

Unnamed: 0,symbol,date,analystRatingsbuy,analystRatingsHold,analystRatingsSell,analystRatingsStrongSell,analystRatingsStrongBuy
0,NVCT,2025-02-01,1,0,0,0,2
1,NVCT,2025-01-01,1,0,0,0,2
2,NVCT,2024-12-01,1,0,0,0,2
3,NVCT,2024-11-01,2,0,0,0,2
4,NVCT,2024-10-01,2,0,0,0,2
5,NVCT,2024-09-01,2,0,0,0,2
6,NVCT,2024-08-01,2,0,0,0,1
7,NVCT,2024-07-01,2,0,0,0,1
8,NVCT,2024-06-01,2,0,0,0,1
9,NVCT,2024-05-01,0,0,0,0,0


In [None]:
display(spark.sql('describe history financial_modeling.analyst_recommendations'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-25 13:21:50,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '8932', 'numOutputBytes': '13870', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-25 13:17:10,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '9529', 'numOutputBytes': '14556', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-25 13:12:42,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '9743', 'numOutputBytes': '14688', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-25 13:08:00,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '9435', 'numOutputBytes': '14410', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-25 13:03:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '10878', 'numOutputBytes': '16288', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-25 12:58:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '9750', 'numOutputBytes': '14550', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-25 12:54:10,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '9080', 'numOutputBytes': '13769', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-25 12:49:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '14139', 'numOutputBytes': '20378', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-25 12:44:49,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '14549', 'numOutputBytes': '21864', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-25 12:39:40,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '15438', 'numOutputBytes': '22682', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.analyst_recommendations'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,analystRatingsbuy,int,
3,analystRatingsHold,int,
4,analystRatingsSell,int,
5,analystRatingsStrongSell,int,
6,analystRatingsStrongBuy,int,


In [None]:
display(spark.sql('select * from financial_modeling.analyst_recommendations limit 3'))

Unnamed: 0,symbol,date,analystRatingsbuy,analystRatingsHold,analystRatingsSell,analystRatingsStrongSell,analystRatingsStrongBuy
0,NVCT,2025-02-01,1,0,0,0,2
1,NVCT,2025-01-01,1,0,0,0,2
2,NVCT,2024-12-01,1,0,0,0,2


In [None]:
spark.sql('select count(distinct symbol) from financial_modeling.analyst_recommendations').show()

+----------------------+
|count(DISTINCT symbol)|
+----------------------+
|                  6082|
+----------------------+



### 6.Company Outlook

In [None]:
# # Company Splits History

# company_splits_history_data = []

# for stock in full_profile_df['symbol'].to_list():

#     base_url = f'https://financialmodelingprep.com/api/v4/company-outlook?symbol={stock}'

#     if API_KEY:
#         url = f"{base_url}&apikey={API_KEY}"
#         data = get_jsonparsed_data(url)
#         df = pd.DataFrame(data['splitsHistory'])
#         df['symbol'] = stock
#         company_splits_history_data.append(df)
#     else:
#         print("API key not found. Please set the FMP_API_KEY environment variable.")


# company_splits_history_df = pd.concat(company_splits_history_data)
# print(company_splits_history_df.shape)
# company_splits_history_df.head()

In [None]:
# # Stock Dividends

# company_stock_dividends_data = []

# for stock in full_profile_df['symbol'].to_list():

#     base_url = f'https://financialmodelingprep.com/api/v4/company-outlook?symbol={stock}'

#     if API_KEY:
#         url = f"{base_url}&apikey={API_KEY}"
#         data = get_jsonparsed_data(url)
#         df = pd.DataFrame(data['stockDividend'])
#         df['symbol'] = stock
#         company_stock_dividends_data.append(df)
#     else:
#         print("API key not found. Please set the FMP_API_KEY environment variable.")


# company_stock_dividends_df = pd.concat(company_stock_dividends_data)
# print(company_stock_dividends_df.shape)
# company_stock_dividends_df.head()

In [None]:
# data.keys()

### 7.Stock Peers

In [None]:
# request_counter = 0
# sleeping_time = 90


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v4/stock_peers?symbol={stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)


#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('stock_peers'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([
#             T.StructField("symbol", T.StringType(), True),
#             T.StructField("peersList", T.ArrayType(T.StringType()), True)
#         ])

#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.stock_peers'))


Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds.

Unnamed: 0,symbol,peersList
0,AAPL,"[LPL, SNEJF, PCRFY, SONO, VZIO, MICS, WLDSW, KOSS, GPRO, SONY, UEIC, HEAR, VUZI, WLDS]"
1,ACCD,"[PRVA, HSTM, NRC, CPSI, NXGN, HCAT, DH, PINC, CERT, PHR, AGTI, EVH, CMAX, RCM]"
2,TPVG,"[TCPC, TSLX, GBDC, NMFC, CGBD, ARCC, MAIN, CSWC, FDUS, CCAP, FSK]"
3,HUMA,"[CRNX, VRDN, CYTK, GPCR, ICVX, IMCR, MIRM, LRMR, KALV, LYRA, THRX, KURA, NRIX, SEER, QSI, HCWB, MNOV]"
4,RAND,"[BGX, FCT, NSL, JRO, EVF, PIAC, AHFCF, BLRZF, URNAF, GCAAF, CBH, EFT, NXG, GNT, CXH, BGT, JLS, AFT, PYCFF]"
5,SPCE,"[ASTR, PL, EVEX, RDW, LMT, RTX, NOC, GD, RKLB, MNTS, BA, LHX]"
6,AVGO,"[AMD, MU, INTC, TSM, MRVL, NVDA]"
7,FEMY,"[, POCI, KRMD, INFU, UTMD, MLSS, PDEX, CLPBY, OPSSF, SAUHY, NEPH]"
8,PAYC,"[TEAM, DDOG, NOW, TTD, HUBS, WDAY, SHOP, U, SNOW, CRM, INTU]"
9,AXDX,"[CVRX, NPCE, ANIK, AXGN, AORT, SGHT, OFIX, AVNS, CNMD, KIDS, SRDX, ITGR, OSA, LIVN, ELMD, LUNG, IRMD]"


In [None]:
display(spark.sql('describe history financial_modeling.stock_peers'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-25 16:20:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '217', 'numOutputBytes': '16218', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-25 16:15:48,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '218', 'numOutputBytes': '15849', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-25 16:10:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '225', 'numOutputBytes': '16770', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-25 16:05:42,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '223', 'numOutputBytes': '16712', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-25 16:00:51,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '238', 'numOutputBytes': '16889', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-25 15:56:13,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '230', 'numOutputBytes': '17957', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-25 15:51:42,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '218', 'numOutputBytes': '15857', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-25 15:46:40,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '249', 'numOutputBytes': '17449', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-25 15:41:55,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '258', 'numOutputBytes': '18582', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-25 15:37:02,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '260', 'numOutputBytes': '18749', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.stock_peers'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,peersList,array,


In [None]:
display(spark.sql('select * from financial_modeling.stock_peers limit 3'))

Unnamed: 0,symbol,peersList
0,AAPL,"[LPL, SNEJF, PCRFY, SONO, VZIO, MICS, WLDSW, KOSS, GPRO, SONY, UEIC, HEAR, VUZI, WLDS]"
1,ACCD,"[PRVA, HSTM, NRC, CPSI, NXGN, HCAT, DH, PINC, CERT, PHR, AGTI, EVH, CMAX, RCM]"
2,TPVG,"[TCPC, TSLX, GBDC, NMFC, CGBD, ARCC, MAIN, CSWC, FDUS, CCAP, FSK]"


In [None]:
spark.sql('select count(distinct symbol) from financial_modeling.stock_peers').show()

+----------------------+
|count(DISTINCT symbol)|
+----------------------+
|                  6595|
+----------------------+



### 8.Delisted Companies

In [None]:

# if API_KEY:

#     base_url = f'https://financialmodelingprep.com/api/v3/delisted-companies'


#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])

#         df = spark.createDataFrame(data, schema=schema)

#         df = df.withColumn("ipoDate", F.col("ipoDate").cast("date")) \
#                .withColumn("delistedDate", F.col("delistedDate").cast("date"))


#         df.write.format("delta").mode('overwrite').option('overwriteSchema', True).saveAsTable('delisted_companies')
#         print('Data was overwritten successfully.')

#     else:
#          print("Missing or invalid data.")

# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")




# display(spark.sql('select * from financial_modeling.delisted_companies'))


Data was overwritten successfully.


Unnamed: 0,symbol,companyName,exchange,ipoDate,delistedDate
0,PP,The Meet Kevin Pricing Power ETF,AMEX,2022-11-29,2025-02-26
1,MOBQW,"Mobiquity Technologies, Inc.",NASDAQ,2021-12-09,2025-02-25
2,MCOMW,Micromobility.com Inc - Warrants (13/08/2026),NASDAQ,2019-12-09,2025-02-25
3,BHAC,Crixus BH3 Acquisition Company,NASDAQ,2015-03-17,2025-02-24
4,BHACW,Crixus BH3 Acquisition Company,NASDAQ,2021-11-30,2025-02-21
5,QYLE,Global X Funds - Global X Nasdaq 100 ESG Covered Call ETF,NASDAQ,2023-02-22,2025-02-21
6,BHACU,Crixus BH3 Acquisition Company,NASDAQ,2021-10-05,2025-02-21
7,CHIH,Global X MSCI China Health Care ETF,AMEX,2018-12-11,2025-02-19
8,CHIU,Global X MSCI China Utilities ETF,AMEX,2018-12-11,2025-02-19
9,FYLG,Global X Financials Covered Call & Growth ETF,AMEX,2022-11-22,2025-02-18


In [None]:
display(spark.sql('describe history financial_modeling.delisted_companies'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,3,2025-02-27 19:50:02,,,CREATE OR REPLACE TABLE AS SELECT,"{'partitionBy': '[]', 'description': None, 'properties': '{}', 'clusterBy': '[]', 'isManaged': 'true'}",,,,2.0,Serializable,False,"{'numOutputRows': '100', 'numOutputBytes': '7104', 'numFiles': '2'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,2,2025-02-27 19:48:45,,,CREATE OR REPLACE TABLE AS SELECT,"{'partitionBy': '[]', 'description': None, 'properties': '{}', 'clusterBy': '[]', 'isManaged': 'true'}",,,,1.0,Serializable,False,"{'numOutputRows': '100', 'numOutputBytes': '7104', 'numFiles': '2'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,1,2025-02-27 19:47:45,,,CREATE OR REPLACE TABLE AS SELECT,"{'partitionBy': '[]', 'description': None, 'properties': '{}', 'clusterBy': '[]', 'isManaged': 'true'}",,,,0.0,Serializable,False,"{'numOutputRows': '100', 'numOutputBytes': '7677', 'numFiles': '2'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,0,2025-02-27 19:44:51,,,CREATE OR REPLACE TABLE AS SELECT,"{'partitionBy': '[]', 'description': None, 'properties': '{}', 'clusterBy': '[]', 'isManaged': 'true'}",,,,,Serializable,False,"{'numOutputRows': '100', 'numOutputBytes': '8044', 'numFiles': '2'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.delisted_companies'))

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,companyName,string,
2,exchange,string,
3,ipoDate,date,
4,delistedDate,date,


In [None]:
display(spark.sql('select * from financial_modeling.delisted_companies limit 3'))

Unnamed: 0,symbol,companyName,exchange,ipoDate,delistedDate
0,PP,The Meet Kevin Pricing Power ETF,AMEX,2022-11-29,2025-02-26
1,MOBQW,"Mobiquity Technologies, Inc.",NASDAQ,2021-12-09,2025-02-25
2,MCOMW,Micromobility.com Inc - Warrants (13/08/2026),NASDAQ,2019-12-09,2025-02-25


In [None]:
spark.sql('select count(distinct symbol), count(*) from financial_modeling.delisted_companies').show()

+----------------------+--------+
|count(DISTINCT symbol)|count(1)|
+----------------------+--------+
|                   100|     100|
+----------------------+--------+



### 9.Company Share Float / Historical Share Float

In [None]:
# request_counter = 0
# sleeping_time = 90


# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v4/historical/shares_float?symbol={stock}'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)



#         combined_df = combined_df.withColumn("date", F.col("date").cast("date")) \
#                                  .withColumn("freeFloat", F.col("freeFloat").cast(T.DoubleType())) \
#                                  .withColumn("floatShares", F.col("floatShares").cast(T.LongType())) \
#                                  .withColumn("outstandingShares", F.col("outstandingShares").cast(T.LongType()))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('historical_shares_float'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])

#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.historical_shares_float'))


In [None]:
display(spark.sql('describe history financial_modeling.historical_shares_float'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-26 22:38:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '202953', 'numOutputBytes': '842251', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-26 22:34:12,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '222910', 'numOutputBytes': '934254', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-26 22:29:19,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '213554', 'numOutputBytes': '927446', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-26 22:24:38,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '219564', 'numOutputBytes': '889238', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-26 22:19:56,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '231113', 'numOutputBytes': '969693', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-26 22:15:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '227257', 'numOutputBytes': '852275', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-26 22:10:24,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '207501', 'numOutputBytes': '840330', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-26 22:05:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '262603', 'numOutputBytes': '1017899', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-26 22:00:39,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '268017', 'numOutputBytes': '1062446', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-26 21:55:52,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '278930', 'numOutputBytes': '1114483', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.historical_shares_float'))

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,freeFloat,double,
3,floatShares,bigint,
4,outstandingShares,bigint,
5,source,string,


In [None]:
display(spark.sql('select * from financial_modeling.historical_shares_float limit 3'))

Unnamed: 0,symbol,date,freeFloat,floatShares,outstandingShares,source
0,BLCO,2025-02-26,10.3085,36302829,352164030,
1,BLCO,2025-02-25,10.3085,36302829,352164030,
2,BLCO,2025-02-24,10.3085,36302829,352164030,


## Financial Statements:

### 1.Income Statement

In [None]:
# request_counter = 0
# sleeping_time = 90

# column_types = {
#     "date": T.DateType(),
#     "fillingDate": T.DateType(),
#     "acceptedDate": T.TimestampType(),
#     "calendarYear": T.IntegerType(),
#     "revenue": T.LongType(),
#     "costOfRevenue": T.LongType(),
#     "grossProfit": T.LongType(),
#     "grossProfitRatio": T.DoubleType(),
#     "researchAndDevelopmentExpenses": T.LongType(),
#     "generalAndAdministrativeExpenses": T.LongType(),
#     "sellingAndMarketingExpenses": T.LongType(),
#     "sellingGeneralAndAdministrativeExpenses": T.LongType(),
#     "otherExpenses": T.LongType(),
#     "operatingExpenses": T.LongType(),
#     "costAndExpenses": T.LongType(),
#     "interestIncome": T.LongType(),
#     "interestExpense": T.LongType(),
#     "depreciationAndAmortization": T.LongType(),
#     "ebitda": T.LongType(),
#     "ebitdaratio": T.DoubleType(),
#     "operatingIncome": T.LongType(),
#     "operatingIncomeRatio": T.DoubleType(),
#     "totalOtherIncomeExpensesNet": T.LongType(),
#     "incomeBeforeTax": T.LongType(),
#     "incomeBeforeTaxRatio": T.DoubleType(),
#     "incomeTaxExpense": T.LongType(),
#     "netIncome": T.LongType(),
#     "netIncomeRatio": T.DoubleType(),
#     "eps": T.DoubleType(),
#     "epsdiluted": T.DoubleType(),
#     "weightedAverageShsOut": T.LongType(),
#     "weightedAverageShsOutDil": T.LongType(),
# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/income-statement/{stock}?period=quarter'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('income_statements'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.income_statements'))

In [None]:
display(spark.sql('describe history financial_modeling.income_statements'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-27 23:41:25,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14818', 'numOutputBytes': '2730656', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-27 23:34:48,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14545', 'numOutputBytes': '2668545', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-27 23:27:44,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '16566', 'numOutputBytes': '3036242', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-27 23:21:33,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15611', 'numOutputBytes': '2868612', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-27 23:15:26,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '16386', 'numOutputBytes': '3043509', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-27 23:09:19,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '15240', 'numOutputBytes': '2801166', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-27 23:03:08,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '14178', 'numOutputBytes': '2583579', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-27 22:57:04,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '18465', 'numOutputBytes': '3391431', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-27 22:50:47,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '19465', 'numOutputBytes': '3585737', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-27 22:44:22,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '20597', 'numOutputBytes': '3842909', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.income_statements'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,reportedCurrency,string,
3,cik,string,
4,fillingDate,date,
5,acceptedDate,timestamp,
6,calendarYear,int,
7,period,string,
8,revenue,bigint,
9,costOfRevenue,bigint,


In [None]:
display(spark.sql('select * from financial_modeling.income_statements limit 3'))

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,revenue,costOfRevenue,grossProfit,grossProfitRatio,researchAndDevelopmentExpenses,generalAndAdministrativeExpenses,sellingAndMarketingExpenses,sellingGeneralAndAdministrativeExpenses,otherExpenses,operatingExpenses,costAndExpenses,interestIncome,interestExpense,depreciationAndAmortization,ebitda,ebitdaratio,operatingIncome,operatingIncomeRatio,totalOtherIncomeExpensesNet,incomeBeforeTax,incomeBeforeTaxRatio,incomeTaxExpense,netIncome,netIncomeRatio,eps,epsdiluted,weightedAverageShsOut,weightedAverageShsOutDil,link,finalLink
0,2024-09-30,CMPOW,USD,1823144,2024-11-08,2024-11-08 09:09:37,2024,Q3,107135000,51727000,55408000,0.517179,0,0,0,26316000,0,26316000,78043000,0,5385000,2331000,-77129000,-0.719923,29092000,0.271545,-113937000,-84845000,-0.791945,629000,-42060000,-0.392589,-1.1,-1.1,38212440,38212440,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000012/0001823144-24-000012-index.htm,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000012/cmpo-20240930.htm
1,2024-06-30,CMPOW,USD,1823144,2024-08-09,2024-08-09 16:10:54,2024,Q2,108567000,52495000,56072000,0.516474,0,0,0,24279000,0,24279000,76774000,0,5316000,2380000,34173000,0.314764,31793000,0.292842,2062000,33855000,0.311835,258000,11099000,0.102232,0.44,0.32,25438469,96640692,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000007/0001823144-24-000007-index.htm,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000007/cmpo-20240630.htm
2,2024-03-31,CMPOW,USD,1823144,2024-05-06,2024-05-06 16:28:17,2024,Q1,104010000,48797000,55213000,0.530843,0,0,0,24077000,0,24077000,72874000,0,5419000,2221000,33357000,0.32071,31136000,0.299356,-14899001,16236999,0.15611,-836000,4025000,0.038698,0.2,0.17,20566970,96235469,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000004/0001823144-24-000004-index.htm,https://www.sec.gov/Archives/edgar/data/1823144/000182314424000004/cmpo-20240331.htm


In [None]:
spark.sql('select count(distinct symbol), count(*) from financial_modeling.income_statements').show()

+----------------------+--------+
|count(DISTINCT symbol)|count(1)|
+----------------------+--------+
|                  9098|  494258|
+----------------------+--------+



### 2.Balance Sheet Statement

In [None]:
# request_counter = 0
# sleeping_time = 90

# column_types = {
#     "date": T.DateType(),
#     "fillingDate": T.DateType(),
#     "acceptedDate": T.TimestampType(),
#     "calendarYear": T.IntegerType(),
#     "cashAndCashEquivalents": T.LongType(),
#     "shortTermInvestments": T.LongType(),
#     "cashAndShortTermInvestments": T.LongType(),
#     "netReceivables": T.LongType(),
#     "inventory": T.LongType(),
#     "otherCurrentAssets": T.LongType(),
#     "totalCurrentAssets": T.LongType(),
#     "propertyPlantEquipmentNet": T.LongType(),
#     "goodwill": T.LongType(),
#     "intangibleAssets": T.LongType(),
#     "goodwillAndIntangibleAssets": T.LongType(),
#     "longTermInvestments": T.LongType(),
#     "taxAssets": T.LongType(),
#     "otherNonCurrentAssets": T.LongType(),
#     "totalNonCurrentAssets": T.LongType(),
#     "otherAssets": T.LongType(),
#     "totalAssets": T.LongType(),
#     "accountPayables": T.LongType(),
#     "shortTermDebt": T.LongType(),
#     "taxPayables": T.LongType(),
#     "deferredRevenue": T.LongType(),
#     "otherCurrentLiabilities": T.LongType(),
#     "totalCurrentLiabilities": T.LongType(),
#     "longTermDebt": T.LongType(),
#     "deferredRevenueNonCurrent": T.LongType(),
#     "deferredTaxLiabilitiesNonCurrent": T.LongType(),
#     "otherNonCurrentLiabilities": T.LongType(),
#     "totalNonCurrentLiabilities": T.LongType(),
#     "otherLiabilities": T.LongType(),
#     "capitalLeaseObligations": T.LongType(),
#     "totalLiabilities": T.LongType(),
#     "preferredStock": T.LongType(),
#     "commonStock": T.LongType(),
#     "retainedEarnings": T.LongType(),
#     "accumulatedOtherComprehensiveIncomeLoss": T.LongType(),
#     "othertotalStockholdersEquity": T.LongType(),
#     "totalStockholdersEquity": T.LongType(),
#     "totalEquity": T.LongType(),
#     "totalLiabilitiesAndStockholdersEquity": T.LongType(),
#     "minorityInterest": T.LongType(),
#     "totalLiabilitiesAndTotalEquity": T.LongType(),
#     "totalInvestments": T.LongType(),
#     "totalDebt": T.LongType(),
#     "netDebt": T.LongType(),
# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/balance-sheet-statement/{stock}?period=quarter'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('balance_sheet_statements'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.balance_sheet_statements'))

In [None]:
display(spark.sql('describe history financial_modeling.balance_sheet_statements'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-28 03:31:11,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14149', 'numOutputBytes': '3397710', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-28 03:24:39,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '13967', 'numOutputBytes': '3299451', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-28 03:18:02,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '15816', 'numOutputBytes': '3708707', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-28 03:11:14,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '14891', 'numOutputBytes': '3457719', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-28 03:04:11,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '15564', 'numOutputBytes': '3742389', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-28 02:56:41,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '14520', 'numOutputBytes': '3387586', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-28 02:49:47,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '13551', 'numOutputBytes': '3171199', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-28 02:43:15,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '17709', 'numOutputBytes': '4306939', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-28 02:36:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '18560', 'numOutputBytes': '4522994', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-28 02:29:50,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '19579', 'numOutputBytes': '4756852', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.balance_sheet_statements'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,reportedCurrency,string,
3,cik,string,
4,fillingDate,date,
5,acceptedDate,timestamp,
6,calendarYear,int,
7,period,string,
8,cashAndCashEquivalents,bigint,
9,shortTermInvestments,bigint,


In [None]:
display(spark.sql('select * from financial_modeling.balance_sheet_statements limit 3'))

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,cashAndCashEquivalents,shortTermInvestments,cashAndShortTermInvestments,netReceivables,inventory,otherCurrentAssets,totalCurrentAssets,propertyPlantEquipmentNet,goodwill,intangibleAssets,goodwillAndIntangibleAssets,longTermInvestments,taxAssets,otherNonCurrentAssets,totalNonCurrentAssets,otherAssets,totalAssets,accountPayables,shortTermDebt,taxPayables,deferredRevenue,otherCurrentLiabilities,totalCurrentLiabilities,longTermDebt,deferredRevenueNonCurrent,deferredTaxLiabilitiesNonCurrent,otherNonCurrentLiabilities,totalNonCurrentLiabilities,otherLiabilities,capitalLeaseObligations,totalLiabilities,preferredStock,commonStock,retainedEarnings,accumulatedOtherComprehensiveIncomeLoss,othertotalStockholdersEquity,totalStockholdersEquity,totalEquity,totalLiabilitiesAndStockholdersEquity,minorityInterest,totalLiabilitiesAndTotalEquity,totalInvestments,totalDebt,netDebt,link,finalLink
0,2024-09-30,ATNF,USD,1690080,2024-11-14,2024-11-14 16:41:29,2024,Q3,194920,0,194920,0,0,986115,1225381,0,0,9244475,9244475,0,0,0,9244475,0,10469856,2090827,473143,0,0,3819860,6383830,10574,0,341678,0,352252,0,0,6736082,0,103,-129238340,-2983072,135955083,3733774,3733774,10469856,0,10469856,0,473143,278223,https://www.sec.gov/Archives/edgar/data/1690080/000121390024098508/0001213900-24-098508-index.htm,https://www.sec.gov/Archives/edgar/data/1690080/000121390024098508/ea0220680-10q_180life.htm
1,2024-06-30,ATNF,USD,1690080,2024-08-12,2024-08-12 17:00:21,2024,Q2,1146288,0,1146288,9959,0,1191243,2347490,0,0,1563955,1563955,0,0,0,1563955,0,3911445,2919883,428996,0,0,667248,4016127,13289,0,300016,0,313305,0,0,4329432,0,95,-128401620,-2915740,130899278,-417987,-417987,3911445,0,3911445,0,442285,-704003,https://www.sec.gov/Archives/edgar/data/1690080/000121390024067741/0001213900-24-067741-index.htm,https://www.sec.gov/Archives/edgar/data/1690080/000121390024067741/ea0210822-10q_180life.htm
2,2024-03-31,ATNF,USD,1690080,2024-05-15,2024-05-15 17:01:05,2024,Q1,675977,0,675977,10224,0,1168747,1854948,0,0,1586792,1586792,0,0,-1,1586791,1,3441740,1837226,772334,0,0,1470747,4080307,16544,-16545,299355,0,315898,1,0,4396206,0,86,-128413401,-2894879,130353728,-954466,-954466,3441740,0,3441740,0,788878,112901,https://www.sec.gov/Archives/edgar/data/1690080/000121390024043746/0001213900-24-043746-index.htm,https://www.sec.gov/Archives/edgar/data/1690080/000121390024043746/ea0204921-10q_180life.htm


In [None]:
display(spark.sql('select count(distinct symbol), count(*) from financial_modeling.balance_sheet_statements'))

Unnamed: 0,count(DISTINCT symbol),count(1)
0,9130,469120


### 3.Cashflow Statement

In [None]:
# request_counter = 0
# sleeping_time = 90

# column_types = {
#     "date": T.DateType(),
#     "fillingDate": T.DateType(),
#     "acceptedDate": T.TimestampType(),
#     "calendarYear": T.IntegerType(),
#     "netIncome": T.LongType(),
#     "depreciationAndAmortization": T.LongType(),
#     "deferredIncomeTax": T.LongType(),
#     "stockBasedCompensation": T.LongType(),
#     "changeInWorkingCapital": T.LongType(),
#     "accountsReceivables": T.LongType(),
#     "inventory": T.LongType(),
#     "accountsPayables": T.LongType(),
#     "otherWorkingCapital": T.LongType(),
#     "otherNonCashItems": T.LongType(),
#     "netCashProvidedByOperatingActivities": T.LongType(),
#     "investmentsInPropertyPlantAndEquipment": T.LongType(),
#     "acquisitionsNet": T.LongType(),
#     "purchasesOfInvestments": T.LongType(),
#     "salesMaturitiesOfInvestments": T.LongType(),
#     "otherInvestingActivites": T.LongType(),
#     "netCashUsedForInvestingActivites": T.LongType(),
#     "debtRepayment": T.LongType(),
#     "commonStockIssued": T.LongType(),
#     "commonStockRepurchased": T.LongType(),
#     "dividendsPaid": T.LongType(),
#     "otherFinancingActivites": T.LongType(),
#     "netCashUsedProvidedByFinancingActivities": T.LongType(),
#     "effectOfForexChangesOnCash": T.LongType(),
#     "netChangeInCash": T.LongType(),
#     "cashAtEndOfPeriod": T.LongType(),
#     "cashAtBeginningOfPeriod": T.LongType(),
#     "operatingCashFlow": T.LongType(),
#     "capitalExpenditure": T.LongType(),
#     "freeCashFlow": T.LongType(),
# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:
#   # for stock in ['LJAQW','LL','LLAP','LOTZ','LOCC','LOTZW','LSAQ','LSDI']:

#     base_url = f'https://financialmodelingprep.com/api/v3/cash-flow-statement/{stock}?period=quarter'

#     if request_counter==300:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('cashflow_statements'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.cashflow_statements'))

In [None]:
display(spark.sql('describe history financial_modeling.cashflow_statements'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,31,2025-02-28 10:34:19,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14078', 'numOutputBytes': '2393437', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
1,30,2025-02-28 10:30:00,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '13905', 'numOutputBytes': '2334520', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
2,29,2025-02-28 10:25:48,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '15766', 'numOutputBytes': '2610856', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
3,28,2025-02-28 10:21:23,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '14930', 'numOutputBytes': '2505195', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
4,27,2025-02-28 10:17:00,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '15427', 'numOutputBytes': '2676613', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
5,26,2025-02-28 10:12:44,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '14329', 'numOutputBytes': '2407606', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
6,25,2025-02-28 10:08:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '13483', 'numOutputBytes': '2270894', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
7,24,2025-02-28 10:04:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '17614', 'numOutputBytes': '3079614', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
8,23,2025-02-28 09:59:39,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '18464', 'numOutputBytes': '3231836', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0
9,22,2025-02-28 09:55:15,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,21,Serializable,True,"{'numOutputRows': '19525', 'numOutputBytes': '3351523', 'numFiles': '1'}",,Apache-Spark/3.5.4 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.cashflow_statements'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,reportedCurrency,string,
3,cik,string,
4,fillingDate,date,
5,acceptedDate,timestamp,
6,calendarYear,int,
7,period,string,
8,netIncome,bigint,
9,depreciationAndAmortization,bigint,


In [None]:
display(spark.sql('select * from financial_modeling.cashflow_statements limit 3'))

Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,netIncome,depreciationAndAmortization,deferredIncomeTax,stockBasedCompensation,changeInWorkingCapital,accountsReceivables,inventory,accountsPayables,otherWorkingCapital,otherNonCashItems,netCashProvidedByOperatingActivities,investmentsInPropertyPlantAndEquipment,acquisitionsNet,purchasesOfInvestments,salesMaturitiesOfInvestments,otherInvestingActivites,netCashUsedForInvestingActivites,debtRepayment,commonStockIssued,commonStockRepurchased,dividendsPaid,otherFinancingActivites,netCashUsedProvidedByFinancingActivities,effectOfForexChangesOnCash,netChangeInCash,cashAtEndOfPeriod,cashAtBeginningOfPeriod,operatingCashFlow,capitalExpenditure,freeCashFlow,link,finalLink
0,2024-10-31,CRWD,USD,1535527,2024-11-27,2024-11-26 21:24:56,2025,Q3,-16819000,55248000,-193000,208888000,-5746000,0,0,0,-5746000,84758000,326136000,-78704000,-500000,-10606000,0,-15771000,-105581000,0,0,0,0,844000,844000,399000,221798000,4262214000,4040416000,326136000,-78704000,247432000,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000026/0001535527-24-000026-index.htm,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000026/crwd-20241031.htm
1,2024-07-31,CRWD,USD,1535527,2024-08-29,2024-08-28 21:54:31,2025,Q2,47013000,52584000,-1674000,200877000,-50919000,41811000,0,-348000,-92382000,78760000,326641000,-39254000,-5000,-1644000,529000,-14516000,-54890000,0,57740000,0,0,62496000,62496000,877000,335124000,4038536000,3702437000,326641000,-53770000,272871000,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000020/0001535527-24-000020-index.htm,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000020/crwd-20240731.htm
2,2024-04-30,CRWD,USD,1535527,2024-06-05,2024-06-04 21:32:23,2025,Q1,42820000,48684000,-255000,183125000,31632000,150249000,0,276000,-118893000,207190000,383228000,-60162000,-95876000,-1658000,107707000,-609000,-50598000,0,823000,0,0,-2518000,-3018000,-1917000,327368000,3702437000,3375069000,383228000,-60162000,323066000,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000013/0001535527-24-000013-index.htm,https://www.sec.gov/Archives/edgar/data/1535527/000153552724000013/crwd-20240430.htm


In [None]:
display(spark.sql('select count(distinct symbol), count(*) from financial_modeling.cashflow_statements'))

Unnamed: 0,count(DISTINCT symbol),count(1)
0,9073,462906


In [None]:
display(
    spark.sql(""" select * from financial_modeling.cashflow_statements where symbol in ('LJAQW','LL','LLAP','LOTZ','LOCC','LOTZW','LSAQ','LSDI') """)
)



Unnamed: 0,date,symbol,reportedCurrency,cik,fillingDate,acceptedDate,calendarYear,period,netIncome,depreciationAndAmortization,deferredIncomeTax,stockBasedCompensation,changeInWorkingCapital,accountsReceivables,inventory,accountsPayables,otherWorkingCapital,otherNonCashItems,netCashProvidedByOperatingActivities,investmentsInPropertyPlantAndEquipment,acquisitionsNet,purchasesOfInvestments,salesMaturitiesOfInvestments,otherInvestingActivites,netCashUsedForInvestingActivites,debtRepayment,commonStockIssued,commonStockRepurchased,dividendsPaid,otherFinancingActivites,netCashUsedProvidedByFinancingActivities,effectOfForexChangesOnCash,netChangeInCash,cashAtEndOfPeriod,cashAtBeginningOfPeriod,operatingCashFlow,capitalExpenditure,freeCashFlow,link,finalLink


### 4.Full Financial Statements As Reported

In [None]:

# column_types = {
#     "date": T.DateType(),
#     "documentannualreport": T.BooleanType(),
#     "documentperiodenddate": T.DateType(),
#     "documenttransitionreport": T.BooleanType(),
#     "entityaddresspostalzipcode": T.IntegralType(),
#     "cityareacode": T.IntegralType(),
#     "notradingsymbolflag": T.BooleanType(),
#     "entitysmallbusiness": T.BooleanType(),
# 		"entityemerginggrowthcompany": T.BooleanType(),
# 		"icfrauditorattestationflag": T.BooleanType(),
# 		"entityshellcompany": T.BooleanType(),
# 		"amendmentflag": T.BooleanType(),
#     "documentfiscalyearfocus": T.IntegerType(),
#     "entitycentralindexkey": T.IntegerType(),
#     "auditorfirmid": T.IntegerType(),

#     "revenuefromcontractwithcustomerexcludingassessedtax": T.LongType(),
#     "costofgoodsandservicessold": T.LongType(),
#     "grossprofit": T.LongType(),
#     "researchanddevelopmentexpense": T.LongType(),
#     "sellinggeneralandadministrativeexpense": T.LongType(),
#     "operatingexpenses": T.LongType(),
#     "operatingincomeloss": T.LongType(),
#     "nonoperatingincomeexpense": T.LongType(),
#     "incomelossfromcontinuingoperationsbeforeincometaxesextraordinaryitemsnoncontrollinginterest": T.LongType(),
#     "incometaxexpensebenefit": T.LongType(),
#     "netincomeloss": T.LongType(),
#     "earningspersharebasic": T.DoubleType(),
#     "earningspersharediluted": T.DoubleType(),
#     "weightedaveragenumberofsharesoutstandingbasic": T.LongType(),
#     "weightedaveragenumberofdilutedsharesoutstanding": T.LongType(),
#     "othercomprehensiveincomelossforeigncurrencytransactionandtranslationadjustmentnetoftax": T.LongType(),
#     "othercomprehensiveincomelossderivativeinstrumentgainlossbeforereclassificationaftertax": T.LongType(),
#     "othercomprehensiveincomelossderivativeinstrumentgainlossreclassificationaftertax": T.LongType(),
#     "othercomprehensiveincomelossderivativeinstrumentgainlossafterreclassificationandtax": T.LongType(),
#     "othercomprehensiveincomeunrealizedholdinggainlossonsecuritiesarisingduringperiodnetoftax": T.LongType(),
#     "othercomprehensiveincomelossreclassificationadjustmentfromaociforsaleofsecuritiesnetoftax": T.LongType(),
#     "othercomprehensiveincomelossavailableforsalesecuritiesadjustmentnetoftax": T.LongType(),
#     "othercomprehensiveincomelossnetoftaxportionattributabletoparent": T.LongType(),
#     "comprehensiveincomenetoftax": T.LongType(),
#     "cashandcashequivalentsatcarryingvalue": T.LongType(),
#     "marketablesecuritiescurrent": T.LongType(),
#     "accountsreceivablenetcurrent": T.LongType(),
#     "inventorynet": T.LongType(),
#     "nontradereceivablescurrent": T.LongType(),
#     "otherassetscurrent": T.LongType(),
#     "assetscurrent": T.LongType(),
#     "marketablesecuritiesnoncurrent": T.LongType(),
#     "propertyplantandequipmentnet": T.LongType(),
#     "otherassetsnoncurrent": T.LongType(),
#     "assetsnoncurrent": T.LongType(),
#     "assets": T.LongType(),

#     "accountspayablecurrent": T.LongType(),
#     "otherliabilitiescurrent": T.LongType(),
#     "contractwithcustomerliabilitycurrent": T.LongType(),
#     "commercialpaper": T.LongType(),
#     "longtermdebtcurrent": T.LongType(),
#     "liabilitiescurrent": T.LongType(),
#     "longtermdebtnoncurrent": T.LongType(),
#     "otherliabilitiesnoncurrent": T.LongType(),
#     "liabilitiesnoncurrent": T.LongType(),
#     "liabilities": T.LongType(),
#     "commonstocksincludingadditionalpaidincapital": T.LongType(),
#     "retainedearningsaccumulateddeficit": T.LongType(),
#     "accumulatedothercomprehensiveincomelossnetoftax": T.LongType(),
#     "stockholdersequity": T.LongType(),
#     "liabilitiesandstockholdersequity": T.LongType(),
#     "commonstockparorstatedvaluepershare": T.LongType(),
#     "commonstocksharesauthorized": T.LongType(),
#     "commonstocksharesissued": T.LongType(),
#     "commonstocksharesoutstanding": T.LongType(),
#     "stockissuedduringperiodvaluenewissues": T.LongType(),
#     "adjustmentsrelatedtotaxwithholdingforsharebasedcompensation": T.LongType(),
#     "adjustmentstoadditionalpaidincapitalsharebasedcompensationrequisiteserviceperiodrecognitionvalue": T.LongType(),
#     "dividends": T.LongType(),
#     "stockrepurchasedandretiredduringperiodvalue": T.LongType(),
#     "commonstockdividendspersharedeclared": T.LongType(),
#     "cashcashequivalentsrestrictedcashandrestrictedcashequivalents": T.LongType(),
#     "depreciationdepletionandamortization": T.LongType(),
#     "sharebasedcompensation": T.LongType(),
#     "deferredincometaxexpensebenefit": T.LongType(),
#     "othernoncashincomeexpense": T.LongType(),
#     "increasedecreaseinaccountsreceivable": T.LongType(),
#     "increasedecreaseininventories": T.LongType(),
#     "increasedecreaseinotherreceivables": T.LongType(),
#     "increasedecreaseinotheroperatingassets": T.LongType(),
#     "increasedecreaseinaccountspayable": T.LongType(),
#     "increasedecreaseincontractwithcustomerliability": T.LongType(),
#     "increasedecreaseinotheroperatingliabilities": T.LongType(),
#     "netcashprovidedbyusedinoperatingactivities": T.LongType(),
#     "paymentstoacquireavailableforsalesecuritiesdebt": T.LongType(),
#     "proceedsfrommaturitiesprepaymentsandcallsofavailableforsalesecurities": T.LongType(),
#     "proceedsfromsaleofavailableforsalesecuritiesdebt": T.LongType(),
#     "paymentstoacquirepropertyplantandequipment": T.LongType(),
#     "paymentstoacquirebusinessesnetofcashacquired": T.LongType(),
#     "paymentsforproceedsfromotherinvestingactivities": T.LongType(),
#     "netcashprovidedbyusedininvestingactivities": T.LongType(),
#     "paymentsrelatedtotaxwithholdingforsharebasedcompensation": T.LongType(),
#     "paymentsofdividends": T.LongType(),
#     "paymentsforrepurchaseofcommonstock": T.LongType(),
#     "proceedsfromissuanceoflongtermdebt": T.LongType(),
#     "repaymentsoflongtermdebt": T.LongType(),
#     "proceedsfromrepaymentsofcommercialpaper": T.LongType(),
#     "proceedsfrompaymentsforotherfinancingactivities": T.LongType(),
#     "netcashprovidedbyusedinfinancingactivities": T.LongType(),
#     "cashcashequivalentsrestrictedcashandrestrictedcashequivalentsperiodincreasedecreaseincludingexchangerateeffect": T.LongType(),
#     "incometaxespaidnet": T.LongType(),
#     "interestpaidnet": T.LongType(),
#     "performanceobligationsinarrangements": T.LongType(),
#     "depreciation": T.LongType(),

#     "weightedaveragenumberdilutedsharesoutstandingadjustment": T.LongType(),
#     "contractwithcustomerliabilityrevenuerecognized": T.LongType(),
#     "contractwithcustomerliability": T.LongType(),
#     "revenueremainingperformanceobligationpercentage": T.DoubleType(),
#     "cash": T.LongType(),
#     "equitysecuritiesfvnicost": T.LongType(),
#     "equitysecuritiesfvniaccumulatedgrossunrealizedlossbeforetax": T.LongType(),
#     "equitysecuritiesfvnicurrentandnoncurrent": T.LongType(),
#     "availableforsaledebtsecuritiesamortizedcostbasis": T.LongType(),
#     "availableforsaledebtsecuritiesaccumulatedgrossunrealizedgainbeforetax": T.LongType(),
#     "availableforsaledebtsecuritiesaccumulatedgrossunrealizedlossbeforetax": T.LongType(),
#     "availableforsalesecuritiesdebtsecurities": T.LongType(),
#     "cashcashequivalentsandmarketablesecuritiescost": T.LongType(),
#     "cashequivalentsandmarketablesecuritiesaccumulatedgrossunrealizedgainbeforetax": T.LongType(),
#     "cashequivalentsandmarketablesecuritiesaccumulatedgrossunrealizedlossbeforetax": T.LongType(),
#     "cashcashequivalentsandmarketablesecurities": T.LongType(),
#     "restrictedinvestments": T.LongType(),
#     "availableforsalesecuritiesdebtmaturitiesrollingyeartwothroughfivefairvalue": T.LongType(),
#     "availableforsalesecuritiesdebtmaturitiesrollingyearsixthroughtenfairvalue": T.LongType(),
#     "availableforsalesecuritiesdebtmaturitiesrollingafteryeartenfairvalue": T.LongType(),
#     "availableforsalesecuritiesdebtmaturitiessinglematuritydate": T.LongType(),
#     "fairvalueconcentrationofriskderivativefinancialinstrumentsassets": T.LongType(),
#     "derivativeassetsreductionformasternettingarrangementsincludingtheeffectsofcollateral": T.LongType(),
#     "derivativeliabilitiesreductionformasternettingarrangementsincludingtheeffectsofcollateral": T.LongType(),
#     "derivativefairvalueofderivativenet": T.LongType(),
#     "numberofcustomerswithsignificantaccountsreceivablebalance": T.LongType(),
#     "concentrationriskpercentage1": T.DoubleType(),
#     "numberofsignificantvendors": T.LongType(),
#     "derivativenotionalamount": T.LongType(),
#     "derivativeassetfairvaluegrossassetincludingnotsubjecttomasternettingarrangement": T.LongType(),
#     "derivativeliabilityfairvaluegrossliabilityincludingnotsubjecttomasternettingarrangement": T.LongType(),
#     "hedgedassetfairvaluehedge": T.LongType(),
#     "hedgedliabilityfairvaluehedge": T.LongType(),
#     "propertyplantandequipmentgross": T.LongType(),
#     "accumulateddepreciationdepletionandamortizationpropertyplantandequipment": T.LongType(),
#     "accruedincometaxesnoncurrent": T.LongType(),
#     "otheraccruedliabilitiesnoncurrent": T.LongType(),
#     "investmentincomeinterestanddividend": T.LongType(),
#     "interestexpense": T.LongType(),
#     "othernonoperatingincomeexpense": T.LongType(),
#     "currentfederaltaxexpensebenefit": T.LongType(),
#     "deferredfederalincometaxexpensebenefit": T.LongType(),
#     "federalincometaxexpensebenefitcontinuingoperations": T.LongType(),
#     "currentstateandlocaltaxexpensebenefit": T.LongType(),
#     "deferredstateandlocalincometaxexpensebenefit": T.LongType(),
#     "stateandlocalincometaxexpensebenefitcontinuingoperations": T.LongType(),
#     "currentforeigntaxexpensebenefit": T.LongType(),
#     "deferredforeignincometaxexpensebenefit": T.LongType(),
#     "foreignincometaxexpensebenefitcontinuingoperations": T.LongType(),
#     "incomelossfromcontinuingoperationsbeforeincometaxesforeign": T.LongType(),
#     "effectiveincometaxratereconciliationatfederalstatutoryincometaxrate": T.DoubleType(),
#     "deferredtaxassetstaxcreditcarryforwardsforeign": T.LongType(),
#     "deferredtaxassetstaxcreditcarryforwardsresearch": T.LongType(),
#     "unrecognizedtaxbenefits": T.LongType(),
#     "unrecognizedtaxbenefitsthatwouldimpacteffectivetaxrate": T.LongType(),
#     "decreaseinunrecognizedtaxbenefitsisreasonablypossible": T.LongType(),
#     "losscontingencyestimateofpossibleloss": T.LongType(),
#     "incometaxreconciliationincometaxexpensebenefitatfederalstatutoryincometaxrate": T.LongType(),
#     "incometaxreconciliationstateandlocalincometaxes": T.LongType(),
#     "effectiveincometaxratereconciliationtaxcutsandjobsactof2017amount": T.LongType(),
#     "incometaxreconciliationforeignincometaxratedifferential": T.LongType(),
#     "effectiveincometaxratereconciliationfdiiamount": T.LongType(),
#     "incometaxreconciliationtaxcreditsresearch": T.LongType(),
#     "effectiveincometaxratereconciliationsharebasedcompensationexcesstaxbenefitamount": T.LongType(),
#     "incometaxreconciliationotheradjustments": T.LongType(),
#     "effectiveincometaxratecontinuingoperations": T.DoubleType(),
#     "deferredtaxassetsgoodwillandintangibleassets": T.LongType(),
#     "deferredtaxassetstaxdeferredexpensereservesandaccruals": T.LongType(),
#     "deferredtaxassetsleaseliabilities": T.LongType(),
#     "deferredtaxassetsdeferredincome": T.LongType(),
#     "deferredtaxassetsothercomprehensiveloss": T.LongType(),
#     "deferredtaxassetstaxcreditcarryforwards": T.LongType(),
#     "deferredtaxassetsother": T.LongType(),
#     "deferredtaxassetsgross": T.LongType(),
#     "deferredtaxassetsvaluationallowance": T.LongType(),
#     "deferredtaxassetsnet": T.LongType(),
#     "deferredtaxliabilitiesminimumtaxonforeignearnings": T.LongType(),
#     "deferredtaxliabilitiesleasingarrangements": T.LongType(),
#     "deferredtaxliabilitiesothercomprehensiveincome": T.LongType(),
#     "deferredtaxliabilitiesother": T.LongType(),
#     "deferredincometaxliabilities": T.LongType(),
#     "deferredtaxassetsliabilitiesnet": T.LongType(),
#     "unrecognizedtaxbenefitsincreasesresultingfrompriorperiodtaxpositions": T.LongType(),
#     "unrecognizedtaxbenefitsdecreasesresultingfrompriorperiodtaxpositions": T.LongType(),
#     "unrecognizedtaxbenefitsincreasesresultingfromcurrentperiodtaxpositions": T.LongType(),
#     "unrecognizedtaxbenefitsdecreasesresultingfromsettlementswithtaxingauthorities": T.LongType(),
#     "unrecognizedtaxbenefitsreductionsresultingfromlapseofapplicablestatuteoflimitations": T.LongType(),
#     "operatingleasecost": T.LongType(),
#     "variableleasecost": T.LongType(),
#     "operatingleasepayments": T.LongType(),
#     "rightofuseassetsobtainedinexchangeforoperatingandfinanceleaseliabilities": T.LongType(),
#     "operatingandfinanceleaseweightedaveragediscountratepercent": T.DoubleType(),
#     "lesseeoperatingandfinanceleaseleasenotyetcommencedpaymentsdue": T.LongType(),
#     "operatingleaserightofuseasset": T.LongType(),
#     "financeleaserightofuseasset": T.LongType(),
#     "operatingandfinanceleaserightofuseasset": T.LongType(),
#     "operatingleaseliabilitycurrent": T.LongType(),
#     "operatingleaseliabilitynoncurrent": T.LongType(),
#     "financeleaseliabilitycurrent": T.LongType(),
#     "financeleaseliabilitynoncurrent": T.LongType(),
#     "operatingandfinanceleaseliability": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsduenexttwelvemonths": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdueyeartwo": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdueyearthree": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdueyearfour": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdueyearfive": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdueafteryearfive": T.LongType(),
#     "lesseeoperatingleaseliabilitypaymentsdue": T.LongType(),
#     "lesseeoperatingleaseliabilityundiscountedexcessamount": T.LongType(),
#     "operatingleaseliability": T.LongType(),
#     "financeleaseliabilitypaymentsduenexttwelvemonths": T.LongType(),
#     "financeleaseliabilitypaymentsdueyeartwo": T.LongType(),
#     "financeleaseliabilitypaymentsdueyearthree": T.LongType(),
#     "financeleaseliabilitypaymentsdueyearfour": T.LongType(),
#     "financeleaseliabilitypaymentsdueyearfive": T.LongType(),
#     "financeleaseliabilitypaymentsdueafteryearfive": T.LongType(),
#     "financeleaseliabilitypaymentsdue": T.LongType(),
#     "financeleaseliabilityundiscountedexcessamount": T.LongType(),
#     "financeleaseliability": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidyearone": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidyeartwo": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidyearthree": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidyearfour": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidyearfive": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitytobepaidafteryearfive": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilitypaymentsdue": T.LongType(),
#     "lesseeoperatingandfinanceleaseliabilityundiscountedexcessamount": T.LongType(),
#     "shorttermdebtweightedaverageinterestrate": T.DoubleType(),
#     "interestcostsincurred": T.LongType(),
#     "longtermdebtfairvalue": T.LongType(),
#     "proceedsfromrepaymentsofshorttermdebtmaturinginthreemonthsorless": T.LongType(),
#     "proceedsfromshorttermdebtmaturinginmorethanthreemonths": T.LongType(),
#     "repaymentsofshorttermdebtmaturinginmorethanthreemonths": T.LongType(),
#     "proceedsfromrepaymentsofshorttermdebtmaturinginmorethanthreemonths": T.LongType(),
#     "debtinstrumentcarryingamount": T.LongType(),
#     "debtinstrumentunamortizeddiscountpremiumanddebtissuancecostsnet": T.LongType(),
#     "hedgeaccountingadjustmentsrelatedtolongtermdebt": T.LongType(),
#     "debtinstrumentmaturityyearrangestart": T.LongType(),
#     "debtinstrumentmaturityyearrangeend": T.LongType(),
#     "debtinstrumentinterestratestatedpercentage": T.DoubleType(),
#     "debtinstrumentinterestrateeffectivepercentage": T.DoubleType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalinnexttwelvemonths": T.LongType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalinyeartwo": T.LongType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalinyearthree": T.LongType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalinyearfour": T.LongType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalinyearfive": T.LongType(),
#     "longtermdebtmaturitiesrepaymentsofprincipalafteryearfive": T.LongType(),
#     "stockrepurchasedandretiredduringperiodshares": T.LongType(),
#     "stockissuedduringperiodsharessharebasedpaymentarrangementnetofshareswithheldfortaxes": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsnumberofsharesofcommonstockissuedperunituponvesting": T.LongType(),
#     "factorbywhicheachrsugrantedreducesandeachrsucanceledorsharewithheldfortaxesincreasessharesavailableforgrant": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardpurchasepriceofcommonstockpercent": T.DoubleType(),

#     "sharebasedcompensationarrangementbysharebasedpaymentawardmaximumemployeesubscriptionrate": T.DoubleType(),
#     "employeestockpurchaseplanmaximumannualpurchasesperemployeeamount": T.LongType(),
#     "definedcontributionplanemployermatchingcontributionpercentofmatch": T.DoubleType(),
#     "definedcontributionplanemployermatchingcontributionpercent": T.DoubleType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsvestedinperiodtotalfairvalue": T.LongType(),
#     "sharespaidfortaxwithholdingforsharebasedcompensation": T.LongType(),
#     "employeeservicesharebasedcompensationnonvestedawardstotalcompensationcostnotyetrecognized": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsnonvestednumber": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsgrantsinperiod": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsvestedinperiod": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsforfeitedinperiod": T.LongType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsnonvestedweightedaveragegrantdatefairvalue": T.DoubleType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsgrantsinperiodweightedaveragegrantdatefairvalue": T.DoubleType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsvestedinperiodweightedaveragegrantdatefairvalue": T.DoubleType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsforfeituresweightedaveragegrantdatefairvalue": T.DoubleType(),
#     "sharebasedcompensationarrangementbysharebasedpaymentawardequityinstrumentsotherthanoptionsaggregateintrinsicvaluenonvested": T.LongType(),
#     "allocatedsharebasedcompensationexpense": T.LongType(),
#     "employeeservicesharebasedcompensationtaxbenefitfromcompensationexpense": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalanceonfirstanniversary": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalanceonsecondanniversary": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalanceonthirdanniversary": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalanceonfourthanniversary": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalanceonfifthanniversary": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationdueafterfiveyears": T.LongType(),
#     "unrecordedunconditionalpurchaseobligationbalancesheetamount": T.LongType(),
#     "othergeneralandadministrativeexpense": T.LongType(),
#     "noncurrentassets": T.LongType(),

# }


# request_counter = 0
# sleeping_time = 90

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/financial-statement-full-as-reported/{stock}?period=qurter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('full_financial_statements'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.full_financial_statements'))

## Statements Analysis:

### 1.Key Metrics

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "revenuePerShare": T.DoubleType(),
#     "netIncomePerShare": T.DoubleType(),
#     "operatingCashFlowPerShare": T.DoubleType(),
#     "freeCashFlowPerShare": T.DoubleType(),
#     "cashPerShare": T.DoubleType(),
#     "bookValuePerShare": T.DoubleType(),
#     "tangibleBookValuePerShare": T.DoubleType(),
#     "shareholdersEquityPerShare": T.DoubleType(),
#     "interestDebtPerShare": T.DoubleType(),
#     "marketCap": T.LongType(),
#     "enterpriseValue": T.LongType(),
#     "peRatio": T.DoubleType(),
#     "priceToSalesRatio": T.DoubleType(),
#     "pocfratio": T.DoubleType(),
#     "pfcfRatio": T.DoubleType(),
#     "pbRatio": T.DoubleType(),
#     "ptbRatio": T.DoubleType(),
#     "evToSales": T.DoubleType(),
#     "enterpriseValueOverEBITDA": T.DoubleType(),
#     "evToOperatingCashFlow": T.DoubleType(),
#     "evToFreeCashFlow": T.DoubleType(),
#     "earningsYield": T.DoubleType(),
#     "freeCashFlowYield": T.DoubleType(),
#     "debtToEquity": T.DoubleType(),
#     "debtToAssets": T.DoubleType(),
#     "netDebtToEBITDA": T.DoubleType(),
#     "currentRatio":T.DoubleType(),
#     "interestCoverage":T.DoubleType(),
#     "incomeQuality":T.DoubleType(),
#     "dividendYield":T.DoubleType(),
#     "payoutRatio":T.DoubleType(),
#     "salesGeneralAndAdministrativeToRevenue":T.IntegerType(),
#     "researchAndDdevelopementToRevenue":T.DoubleType(),
#     "intangiblesToTotalAssets":T.IntegerType(),
#     "capexToOperatingCashFlow":T.DoubleType(),
#     "capexToRevenue":T.DoubleType(),
#     "capexToDepreciation":T.DoubleType(),
#     "stockBasedCompensationToRevenue":T.DoubleType(),
#     "grahamNumber":T.DoubleType(),
#     "roic":T.DoubleType(),
#     "returnOnTangibleAssets":T.DoubleType(),
#     "grahamNetNet":T.DoubleType(),
#     "workingCapital":T.LongType(),
#     "tangibleAssetValue":T.LongType(),
#     "netCurrentAssetValue":T.LongType(),
#     "investedCapital":T.DoubleType(),
#     "averageReceivables": T.LongType(),
#     "averagePayables": T.LongType(),
#     "averageInventory": T.LongType(),
#     "daysSalesOutstanding": T.DoubleType(),
#     "daysPayablesOutstanding": T.DoubleType(),
#     "daysOfInventoryOnHand": T.DoubleType(),
#     "receivablesTurnover": T.DoubleType(),
#     "payablesTurnover": T.DoubleType(),
#     "inventoryTurnover": T.LongType(),
#     "roe": T.LongType(),
#     "capexPerShare": T.LongType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/key-metrics/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('key_metrics'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.key_metrics'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,symbol,date,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,interestDebtPerShare,marketCap,enterpriseValue,peRatio,priceToSalesRatio,pocfratio,pfcfRatio,pbRatio,ptbRatio,evToSales,enterpriseValueOverEBITDA,evToOperatingCashFlow,evToFreeCashFlow,earningsYield,freeCashFlowYield,debtToEquity,debtToAssets,netDebtToEBITDA,currentRatio,interestCoverage,incomeQuality,dividendYield,payoutRatio,salesGeneralAndAdministrativeToRevenue,researchAndDdevelopementToRevenue,intangiblesToTotalAssets,capexToOperatingCashFlow,capexToRevenue,capexToDepreciation,stockBasedCompensationToRevenue,grahamNumber,roic,returnOnTangibleAssets,grahamNetNet,workingCapital,tangibleAssetValue,netCurrentAssetValue,investedCapital,averageReceivables,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare
0,R,2024-12-31,2024,Q4,74.444625,3.133777,12.952947,-4.712273,3.574828,72.35544,34.866176,72.35544,208.570301,,15488370000.0,12.513652,2.10707,12.109986,-33.287547,2.167909,2.167909,4.829552,20.651163,27.756939,-76.297399,0.019978,-0.030041,2.850497,0.532929,11.641333,0.752981,0.0,4.133333,0.005032,0.251852,0,0.0,0,1.363799,0.237293,1.622601,0.0,71.426757,0.0,0.008966,-278.679867,-808000000,1502000000,-11092000000,12094000000.0,1865000000,859500000,0,52.22638,29.246468,0.0,1.723267,3.077295,0,0,17
1,R,2024-09-30,2024,Q3,74.489247,3.323114,14.719993,0.725469,3.791159,71.517166,33.93321,71.517166,207.226604,,14825180000.0,10.968626,1.95733,9.904896,200.973542,2.038671,2.038671,4.657612,20.590528,23.569443,478.231606,0.022792,0.004976,2.86551,0.530824,11.9375,0.741514,2.928571,4.429577,0.005618,0.246479,0,0.0,0,0.950715,0.187873,1.3,0.0,73.125534,0.014988,0.009536,-277.953944,-830000000,1450000000,-11060000000,3070000000.0,1865000000,844000000,0,52.846371,31.720728,0.0,1.70305,2.837262,0,0,13
2,R,2024-06-30,2024,Q2,72.966589,2.912243,12.657938,-1.97207,3.760692,70.650554,33.846224,70.650554,202.481139,5350388000.0,13920390000.0,10.53226,1.681454,9.692732,-62.213816,1.736575,1.736575,4.374729,20.14528,25.218095,-161.864979,0.023737,-0.016074,2.834794,0.532301,12.402315,0.744661,2.71875,4.416,0.005794,0.244094,0,0.0,0,1.155797,0.200503,1.453303,0.0,68.039771,0.012643,0.008579,-269.835355,-813000000,1476000000,-10956000000,12112000000.0,1837500000,825500000,0,52.636706,27.975819,0.0,1.709833,3.217064,0,0,14
3,R,2024-03-31,2024,Q1,70.637056,1.938073,11.993251,-3.648137,5.335401,69.884628,33.722468,69.884628,202.175202,5268662000.0,13809660000.0,15.496063,1.700665,10.016467,-32.929135,1.718976,1.718976,4.457605,21.543934,26.254109,-86.310385,0.016133,-0.030368,2.862969,0.532431,13.324493,0.824537,2.032609,6.188235,0.006643,0.411765,0,0.0,0,1.304183,0.221433,1.577011,0.006456,55.203567,0.009545,0.005707,-269.540335,-512000000,1479000000,-11010000000,12473000000.0,1764000000,843500000,-857000000,52.698515,30.212264,0.0,1.707828,2.978923,0,0,15
4,R,2023-12-31,2023,Q4,68.857809,2.824469,11.639544,-6.058941,4.646706,69.905596,39.474225,69.905596,163.955841,,,10.184217,1.67098,9.885267,-18.990119,1.645934,1.645934,3.956788,-16.498444,23.407772,-44.967562,0.024548,-0.052659,2.318019,0.450881,-9.531034,1.096321,2.642857,4.120968,0.006335,0.258065,0,0.0,0,1.520548,0.257029,1.734375,0.014555,66.65237,0.011079,0.008586,-275.078177,199000000,1733000000,-10444000000,11644000000.0,1680500000,872000000,-819500000,51.028779,30.81381,-63.403206,1.763711,2.920768,-1,0,17
5,R,2023-09-30,2023,Q3,64.852396,3.570874,13.773371,-4.080999,3.526515,68.689423,43.538049,68.689423,148.512875,4822055000.0,11284050000.0,7.487662,1.649129,7.764983,-26.206819,1.557008,1.557008,3.859116,70.525342,18.17078,-61.326384,0.033388,-0.038158,2.137875,0.431898,40.3875,1.006193,2.133333,3.88125,0.006844,0.204969,0,0.0,0,1.296296,0.275308,1.829545,0.0,74.28882,0.007838,0.011341,-239.56462,13000000,1963000000,-10121000000,11186000000.0,1609500000,922500000,74500000,50.694254,35.08344,2.888318,1.775349,2.565313,31,0,17
6,R,2023-06-30,2023,Q2,62.729744,-0.391517,16.160957,-5.829255,4.741707,67.340946,42.501359,67.340946,143.491028,,10205220000.0,-54.141948,1.351671,5.246595,-14.545598,1.259115,1.259115,3.538565,16.9804,13.735155,-38.07918,-0.004617,-0.068749,2.107558,0.435843,10.494176,1.017299,-0.25,-41.277778,0.007183,-1.555556,0,0.0,0,1.3607,0.350555,2.36215,0.0,,0.000511,-0.001302,-227.101686,36000000,1954000000,-9758000000,11105000000.0,1627000000,957000000,74000000,49.056865,36.484375,2.890625,1.834606,2.466809,31,0,21
7,R,2023-03-31,2023,Q1,65.204735,2.997175,10.306833,-3.514673,5.45529,64.795049,40.019837,64.795049,138.107251,4138683000.0,10225680000.0,7.443675,1.368612,8.658334,-25.390696,1.377266,1.377266,3.381509,13.12668,21.392643,-62.734254,0.033586,-0.039385,2.109817,0.435949,7.813864,1.086016,2.153846,3.414286,0.008457,0.251799,0,0.0,0,1.341004,0.211971,1.35518,0.006614,66.102555,0.006808,0.010378,-215.333031,179000000,1856000000,-9278000000,10674000000.0,1646000000,873500000,76000000,50.059524,36.296296,2.740741,1.79786,2.479592,32,0,13
8,R,2022-12-31,2022,Q4,67.114557,4.34532,11.043445,-4.022444,5.631548,61.947031,37.564747,61.947031,167.786392,,,4.808047,1.245184,7.567385,-20.775926,1.349056,1.349056,3.641476,14.429859,22.130418,-60.758098,0.051996,-0.048133,2.687096,0.548246,9.495641,1.118454,-2.24819,2.612496,0.007349,0.141347,0,0.0,0,1.364238,0.22448,1.495482,0.014456,77.823788,-0.006945,0.015561,-209.748809,233000000,1781000000,-9258000000,10727000000.0,1615418500,821541500,78257500,45.537398,27.722892,2.819277,1.976398,3.246415,31,0,15
9,R,2022-09-30,2022,Q3,60.944465,4.939164,13.726017,-0.764044,9.161547,60.450548,37.034675,60.450548,128.335943,,9637655000.0,3.820991,1.238669,5.499775,-98.803147,1.248789,1.248789,3.175086,11.473399,14.0976,-253.262599,0.065428,-0.010121,2.103793,0.436865,6.997381,1.086454,4.619377,2.77428,0.008263,0.126297,0,0.0,0,1.055664,0.237758,1.608378,0.0,81.963048,0.014504,0.018451,-196.301947,186100000,1844549000,-9149500000,10743550000.0,1656874000,921319000,80306000,48.058025,32.62879,2.924209,1.872736,2.7583,30,0,14


In [None]:
display(spark.sql('describe history financial_modeling.key_metrics'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-20 09:51:41,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '8330', 'numOutputBytes': '2660694', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-20 09:48:17,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '15156', 'numOutputBytes': '5150632', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-20 09:43:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14768', 'numOutputBytes': '4987041', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-20 09:38:07,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '16761', 'numOutputBytes': '5652689', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-20 09:33:11,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15619', 'numOutputBytes': '5330456', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-20 09:27:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '16532', 'numOutputBytes': '5704193', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-20 09:22:46,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '15497', 'numOutputBytes': '5233345', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-20 09:17:38,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '14336', 'numOutputBytes': '4857935', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-20 09:12:34,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '18691', 'numOutputBytes': '6476553', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-20 09:07:10,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '19626', 'numOutputBytes': '6886408', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.key_metrics'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,calendarYear,int,
3,period,string,
4,revenuePerShare,double,
5,netIncomePerShare,double,
6,operatingCashFlowPerShare,double,
7,freeCashFlowPerShare,double,
8,cashPerShare,double,
9,bookValuePerShare,double,


In [None]:
display(spark.sql('select * from financial_modeling.key_metrics limit 3'))

Unnamed: 0,symbol,date,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,interestDebtPerShare,marketCap,enterpriseValue,peRatio,priceToSalesRatio,pocfratio,pfcfRatio,pbRatio,ptbRatio,evToSales,enterpriseValueOverEBITDA,evToOperatingCashFlow,evToFreeCashFlow,earningsYield,freeCashFlowYield,debtToEquity,debtToAssets,netDebtToEBITDA,currentRatio,interestCoverage,incomeQuality,dividendYield,payoutRatio,salesGeneralAndAdministrativeToRevenue,researchAndDdevelopementToRevenue,intangiblesToTotalAssets,capexToOperatingCashFlow,capexToRevenue,capexToDepreciation,stockBasedCompensationToRevenue,grahamNumber,roic,returnOnTangibleAssets,grahamNetNet,workingCapital,tangibleAssetValue,netCurrentAssetValue,investedCapital,averageReceivables,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare
0,R,2024-12-31,2024,Q4,74.444625,3.133777,12.952947,-4.712273,3.574828,72.35544,34.866176,72.35544,208.570301,,15488371940,12.513652,2.10707,12.109986,-33.287547,2.167909,2.167909,4.829552,20.651163,27.756939,-76.297399,0.019978,-0.030041,2.850497,0.532929,11.641333,0.752981,0.0,4.133333,0.005032,0.251852,0,0.0,0,1.363799,0.237293,1.622601,0.0,71.426757,0.0,0.008966,-278.679867,-808000000,1502000000,-11092000000,12094000000.0,1865000000,859500000,0,52.22638,29.246468,0.0,1.723267,3.077295,0,0,17
1,R,2024-09-30,2024,Q3,74.489247,3.323114,14.719993,0.725469,3.791159,71.517166,33.93321,71.517166,207.226604,,14825179800,10.968626,1.95733,9.904896,200.973542,2.038671,2.038671,4.657612,20.590528,23.569443,478.231606,0.022792,0.004976,2.86551,0.530824,11.9375,0.741514,2.928571,4.429577,0.005618,0.246479,0,0.0,0,0.950715,0.187873,1.3,0.0,73.125534,0.014988,0.009536,-277.953944,-830000000,1450000000,-11060000000,3070000000.0,1865000000,844000000,0,52.846371,31.720728,0.0,1.70305,2.837262,0,0,13
2,R,2024-06-30,2024,Q2,72.966589,2.912243,12.657938,-1.97207,3.760692,70.650554,33.846224,70.650554,202.481139,5350388000.0,13920388210,10.53226,1.681454,9.692732,-62.213816,1.736575,1.736575,4.374729,20.14528,25.218095,-161.864979,0.023737,-0.016074,2.834794,0.532301,12.402315,0.744661,2.71875,4.416,0.005794,0.244094,0,0.0,0,1.155797,0.200503,1.453303,0.0,68.039771,0.012643,0.008579,-269.835355,-813000000,1476000000,-10956000000,12112000000.0,1837500000,825500000,0,52.636706,27.975819,0.0,1.709833,3.217064,0,0,14


### 2.Ratios

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "currentRatio": T.DoubleType(),
#     "quickRatio": T.DoubleType(),
#     "cashRatio": T.DoubleType(),
#     "daysOfSalesOutstanding": T.DoubleType(),
#     "daysOfInventoryOutstanding": T.DoubleType(),
#     "operatingCycle": T.DoubleType(),
#     "daysOfPayablesOutstanding": T.DoubleType(),
#     "cashConversionCycle": T.DoubleType(),
#     "grossProfitMargin": T.DoubleType(),
#     "operatingProfitMargin": T.DoubleType(),
#     "pretaxProfitMargin": T.DoubleType(),
#     "netProfitMargin": T.DoubleType(),
#     "effectiveTaxRate": T.DoubleType(),
#     "returnOnAssets": T.DoubleType(),
#     "returnOnEquity": T.DoubleType(),
#     "returnOnCapitalEmployed": T.DoubleType(),
#     "netIncomePerEBT": T.DoubleType(),
#     "ebtPerEbit": T.DoubleType(),
#     "ebitPerRevenue": T.DoubleType(),
#     "debtRatio": T.DoubleType(),
#     "debtEquityRatio": T.DoubleType(),
#     "longTermDebtToCapitalization": T.DoubleType(),
#     "totalDebtToCapitalization": T.DoubleType(),
#     "interestCoverage": T.DoubleType(),
#     "cashFlowToDebtRatio": T.DoubleType(),
#     "companyEquityMultiplier": T.DoubleType(),
#     "receivablesTurnover":T.DoubleType(),
#     "payablesTurnover":T.DoubleType(),
#     "inventoryTurnover":T.DoubleType(),
#     "fixedAssetTurnover":T.DoubleType(),
#     "assetTurnover":T.DoubleType(),
#     "operatingCashFlowPerShare":T.DoubleType(),
#     "freeCashFlowPerShare":T.DoubleType(),
#     "cashPerShare":T.DoubleType(),
#     "payoutRatio":T.DoubleType(),
#     "operatingCashFlowSalesRatio":T.DoubleType(),
#     "freeCashFlowOperatingCashFlowRatio":T.DoubleType(),
#     "cashFlowCoverageRatios":T.DoubleType(),
#     "shortTermCoverageRatios":T.DoubleType(),
#     "capitalExpenditureCoverageRatio":T.DoubleType(),
#     "dividendPaidAndCapexCoverageRatio":T.DoubleType(),
#     "dividendPayoutRatio":T.DoubleType(),
#     "priceBookValueRatio":T.DoubleType(),
#     "priceToBookRatio":T.DoubleType(),
#     "priceToSalesRatio":T.DoubleType(),
#     "priceEarningsRatio":T.DoubleType(),
#     "priceToFreeCashFlowsRatio": T.DoubleType(),
#     "priceToOperatingCashFlowsRatio": T.DoubleType(),
#     "priceCashFlowRatio": T.DoubleType(),
#     "priceEarningsToGrowthRatio": T.DoubleType(),
#     "priceSalesRatio": T.DoubleType(),
#     "dividendYield": T.DoubleType(),
#     "enterpriseValueMultiple": T.DoubleType(),
#     "priceFairValue": T.DoubleType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/ratios/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('ratios'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.ratios'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,symbol,date,calendarYear,period,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,effectiveTaxRate,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,netIncomePerEBT,ebtPerEbit,ebitPerRevenue,debtRatio,debtEquityRatio,longTermDebtToCapitalization,totalDebtToCapitalization,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatio,dividendPaidAndCapexCoverageRatio,dividendPayoutRatio,priceBookValueRatio,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
0,OCN,2024-12-31,2024,Q4,53.380745,53.380745,0.732171,4468.769716,0.0,4468.769716,0.0,4468.769716,0.404574,0.0,-0.132492,-0.110804,0.166667,-0.00171,-0.05701,0.0,0.83631,0.0,0.0,0.896723,29.900588,0.967504,0.967638,0.0,-0.014792,33.344289,0.02014,0.0,0.0,23.054545,0.01543,-27.891173,-43.001024,23.643526,0.0,-0.859621,1.541743,-0.014792,-4.029575,-1.845893,-1.845893,0.0,0.48698,0.48698,0.946499,-2.135518,-0.714169,-1.101065,-1.101065,0.009104,0.946499,0.0,327.283899,0.48698
1,OCN,2024-09-30,2024,Q3,61.52979,5.868575,1.636098,53.404549,5922.845304,5976.249852,0.0,5976.249852,0.501034,0.419711,0.095107,0.073742,0.228261,0.001618,0.045717,0.009329,0.775362,0.226601,0.419711,0.936938,26.474258,0.963602,0.963602,0.0,0.001549,28.256142,1.68525,0.0,0.0151954,24.803419,0.02194,2.440374,-4.270654,35.601496,0.0,0.066161,-1.75,0.001549,0.0,0.363636,0.363636,0.0,0.536835,0.536835,0.865929,2.935662,-7.478948,13.088159,13.088159,0.028089,0.865929,0.0,101.508971,0.536835
2,OCN,2024-06-30,2024,Q2,49.767229,49.767229,1.201928,56.768092,0.0,56.768092,0.0,56.768092,0.933799,0.450658,0.05551,0.043174,0.222222,0.000802,0.023532,0.008511,0.777778,0.123175,0.450658,0.914801,26.826311,0.963444,0.964063,0.0,-0.006508,29.324742,1.585398,0.0,0.0,19.772358,0.018587,-9.867334,-14.402002,31.59067,0.0,-0.320312,1.459564,-0.006508,-1.461538,-2.175978,-2.175978,0.0,0.412961,0.412961,0.757661,4.387218,-1.620608,-2.36538,-2.36538,-0.066576,0.757661,0.0,82.328929,0.412961
3,OCN,2024-03-31,2024,Q1,50.136609,50.136609,1.234398,57.613636,-5e-06,57.613631,0.0,57.613631,0.927189,0.518098,0.133838,0.126684,0.053459,0.002299,0.06966,0.009553,0.946541,0.258327,0.518098,0.911101,27.601018,0.964434,0.965036,0.0,-0.031057,30.294145,1.56213,0.0,-17300000.0,20.135593,0.018151,-46.023113,-50.794408,31.212219,0.0,-1.558923,1.103672,-0.031057,-1.852,-9.645833,-9.645833,0.0,0.459122,0.459122,0.83496,1.647728,-0.48529,-0.5356,-0.5356,-0.010108,0.83496,0.0,86.668514,0.459122
4,OCN,2023-12-31,2023,Q4,44.536608,44.588904,1.160073,47.004049,-90.0,-42.995951,0.0,-42.995951,0.961201,0.154521,-0.153846,-0.160256,-0.039474,-0.003796,-0.118218,0.003725,1.041667,-0.995633,0.154521,0.913383,28.446491,0.962834,0.96604,0.0,0.031304,31.144102,1.914729,0.0,-1.0,22.625954,0.023686,46.602923,37.094781,34.724257,0.0,1.207152,0.795975,0.031304,0.354117,4.90137,4.90137,0.0,0.471014,0.471014,0.638507,-0.996071,0.664514,0.528937,0.528937,0.001515,0.638507,0.0,64.312131,0.471014
5,OCN,2023-09-30,2023,Q3,45.534483,45.536347,1.238583,65.311131,-1.978022,63.333109,0.0,63.333109,0.920245,0.565294,0.04163,0.037248,0.105263,0.000653,0.019097,0.01008,0.894737,0.073643,0.565294,0.917705,26.827455,0.960427,0.964064,0.0,0.028767,29.233206,1.378019,0.0,-45.5,14.173913,0.017538,42.432353,37.762942,32.88353,0.0,1.505259,0.889956,0.028767,6.420561,9.087302,9.087302,0.0,0.448321,0.448321,0.874442,5.869048,0.652756,0.580925,0.580925,-0.129502,0.874442,0.0,80.071798,0.448321
6,OCN,2023-06-30,2023,Q2,46.822773,46.829201,1.52663,68.238591,-6.086957,62.151634,0.0,62.151634,0.917134,0.436749,0.065252,0.06205,0.055215,0.001173,0.035731,0.008393,0.95092,0.149404,0.436749,0.922352,28.100046,0.961436,0.965636,0.0,-0.040394,30.465652,1.318902,0.0,-14.78571,14.781065,0.018901,-61.947104,-66.010653,42.006779,0.0,-1.971177,1.065597,-0.040394,-9.67387,-15.244583,-15.244583,0.0,0.451673,0.451673,0.784371,3.160256,-0.373425,-0.39792,-0.39792,-0.022898,0.784371,0.0,63.139004,0.451673
7,OCN,2023-03-31,2023,Q1,38.805778,38.92817,1.026886,67.550411,-144.473684,-76.923273,0.0,-76.923273,0.929052,0.294249,-0.143017,-0.150112,-0.049608,-0.003184,-0.096565,0.006366,1.049608,-0.486041,0.294249,0.91656,27.800625,0.960725,0.965279,0.0,-0.023347,30.331492,1.332338,0.0,-0.6229508,14.169312,0.021209,-35.892243,-40.14299,38.044184,0.0,-1.008962,1.118431,-0.023347,-5.117424,-8.44375,-8.44375,0.0,0.445754,0.445754,0.692933,-1.154026,-0.614055,-0.686778,-0.686778,0.023506,0.692933,0.0,76.179254,0.445754
8,OCN,2022-12-31,2022,Q4,1.341394,1.341833,0.200923,65.712976,-3.281678,62.431299,0.0,62.431299,0.933841,-0.257058,-0.316977,-0.320343,-0.010617,-0.006426,-0.174458,-0.005794,1.010617,1.233096,-0.257058,0.907365,24.634552,0.956881,0.96099,0.0,-0.004031,27.149551,1.369593,0.0,-27.425,12.312772,0.020059,-5.970741,-9.419329,36.183118,0.0,-0.182319,1.577581,-0.004031,-0.942744,-1.731358,-1.731358,0.0,0.409918,0.409918,0.752697,-0.587416,-2.616959,-4.128466,-4.128466,0.001671,0.752697,0.0,-185.701135,0.409918
9,OCN,2022-09-30,2022,Q3,1.393123,1.393123,0.200108,3111.214927,0.0,3111.214927,0.0,3111.214927,0.90555,0.847168,0.137327,0.153798,-0.119941,0.002989,0.067641,0.018495,1.119941,0.162101,0.847168,0.898756,20.341631,0.946925,0.953143,0.0,-0.001762,22.633095,0.028928,0.0,0.0,12.787722,0.019432,-2.211682,-10.651919,30.720013,0.0,-0.081485,4.816208,-0.001762,-0.014502,-0.26204,-0.26204,0.0,0.39942,0.39942,0.908181,1.476258,-2.314137,-11.145364,-11.145364,0.005421,0.908181,0.0,98.793333,0.39942


In [None]:
display(spark.sql('describe history financial_modeling.ratios'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-20 14:47:26,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '8330', 'numOutputBytes': '2908088', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-20 14:43:52,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '15158', 'numOutputBytes': '5659066', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-20 14:38:52,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14768', 'numOutputBytes': '5453440', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-20 14:34:04,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '16761', 'numOutputBytes': '6224800', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-20 14:28:59,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15619', 'numOutputBytes': '5860950', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-20 14:23:56,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '16533', 'numOutputBytes': '6223462', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-20 14:18:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '15499', 'numOutputBytes': '5767730', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-20 14:12:35,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '14336', 'numOutputBytes': '5299197', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-20 14:07:31,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '18293', 'numOutputBytes': '6883245', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-20 14:00:49,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '19626', 'numOutputBytes': '7484999', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.ratios'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,calendarYear,int,
3,period,string,
4,currentRatio,double,
5,quickRatio,double,
6,cashRatio,double,
7,daysOfSalesOutstanding,double,
8,daysOfInventoryOutstanding,double,
9,operatingCycle,double,


In [None]:
display(spark.sql('select * from financial_modeling.ratios limit 3'))

Unnamed: 0,symbol,date,calendarYear,period,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,daysOfInventoryOutstanding,operatingCycle,daysOfPayablesOutstanding,cashConversionCycle,grossProfitMargin,operatingProfitMargin,pretaxProfitMargin,netProfitMargin,effectiveTaxRate,returnOnAssets,returnOnEquity,returnOnCapitalEmployed,netIncomePerEBT,ebtPerEbit,ebitPerRevenue,debtRatio,debtEquityRatio,longTermDebtToCapitalization,totalDebtToCapitalization,interestCoverage,cashFlowToDebtRatio,companyEquityMultiplier,receivablesTurnover,payablesTurnover,inventoryTurnover,fixedAssetTurnover,assetTurnover,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,payoutRatio,operatingCashFlowSalesRatio,freeCashFlowOperatingCashFlowRatio,cashFlowCoverageRatios,shortTermCoverageRatios,capitalExpenditureCoverageRatio,dividendPaidAndCapexCoverageRatio,dividendPayoutRatio,priceBookValueRatio,priceToBookRatio,priceToSalesRatio,priceEarningsRatio,priceToFreeCashFlowsRatio,priceToOperatingCashFlowsRatio,priceCashFlowRatio,priceEarningsToGrowthRatio,priceSalesRatio,dividendYield,enterpriseValueMultiple,priceFairValue
0,OCN,2024-12-31,2024,Q4,53.380745,53.380745,0.732171,4468.769716,0.0,4468.769716,0.0,4468.769716,0.404574,0.0,-0.132492,-0.110804,0.166667,-0.00171,-0.05701,0.0,0.83631,0.0,0.0,0.896723,29.900588,0.967504,0.967638,0.0,-0.014792,33.344289,0.02014,0.0,0.0,23.054545,0.01543,-27.891173,-43.001024,23.643526,0.0,-0.859621,1.541743,-0.014792,-4.029575,-1.845893,-1.845893,0.0,0.48698,0.48698,0.946499,-2.135518,-0.714169,-1.101065,-1.101065,0.009104,0.946499,0.0,327.283899,0.48698
1,OCN,2024-09-30,2024,Q3,61.52979,5.868575,1.636098,53.404549,5922.845304,5976.249852,0.0,5976.249852,0.501034,0.419711,0.095107,0.073742,0.228261,0.001618,0.045717,0.009329,0.775362,0.226601,0.419711,0.936938,26.474258,0.963602,0.963602,0.0,0.001549,28.256142,1.68525,0.0,0.015195,24.803419,0.02194,2.440374,-4.270654,35.601496,0.0,0.066161,-1.75,0.001549,0.0,0.363636,0.363636,0.0,0.536835,0.536835,0.865929,2.935662,-7.478948,13.088159,13.088159,0.028089,0.865929,0.0,101.508971,0.536835
2,OCN,2024-06-30,2024,Q2,49.767229,49.767229,1.201928,56.768092,0.0,56.768092,0.0,56.768092,0.933799,0.450658,0.05551,0.043174,0.222222,0.000802,0.023532,0.008511,0.777778,0.123175,0.450658,0.914801,26.826311,0.963444,0.964063,0.0,-0.006508,29.324742,1.585398,0.0,0.0,19.772358,0.018587,-9.867334,-14.402002,31.59067,0.0,-0.320312,1.459564,-0.006508,-1.461538,-2.175978,-2.175978,0.0,0.412961,0.412961,0.757661,4.387218,-1.620608,-2.36538,-2.36538,-0.066576,0.757661,0.0,82.328929,0.412961


### 3.Cashflow Growth

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "growthNetIncome": T.DoubleType(),
#     "growthDepreciationAndAmortization": T.DoubleType(),
#     "growthDeferredIncomeTax": T.DoubleType(),
#     "growthStockBasedCompensation": T.DoubleType(),
#     "growthChangeInWorkingCapital": T.DoubleType(),
#     "growthAccountsReceivables": T.DoubleType(),
#     "growthInventory": T.DoubleType(),
#     "growthAccountsPayables": T.DoubleType(),
#     "growthOtherWorkingCapital": T.DoubleType(),
#     "growthOtherNonCashItems": T.DoubleType(),
#     "growthNetCashProvidedByOperatingActivites": T.DoubleType(),
#     "growthInvestmentsInPropertyPlantAndEquipment": T.DoubleType(),
#     "growthAcquisitionsNet": T.DoubleType(),
#     "growthPurchasesOfInvestments": T.DoubleType(),
#     "growthSalesMaturitiesOfInvestments": T.DoubleType(),
#     "growthOtherInvestingActivites": T.DoubleType(),
#     "growthNetCashUsedForInvestingActivites": T.DoubleType(),
#     "growthDebtRepayment": T.DoubleType(),
#     "growthCommonStockIssued": T.DoubleType(),
#     "growthCommonStockRepurchased": T.DoubleType(),
#     "growthDividendsPaid": T.DoubleType(),
#     "growthOtherFinancingActivites": T.DoubleType(),
#     "growthNetCashUsedProvidedByFinancingActivities": T.DoubleType(),
#     "growthEffectOfForexChangesOnCash": T.DoubleType(),
#     "growthNetChangeInCash": T.DoubleType(),
#     "growthCashAtEndOfPeriod": T.DoubleType(),
#     "growthCashAtBeginningOfPeriod":T.DoubleType(),
#     "growthOperatingCashFlow":T.DoubleType(),
#     "growthCapitalExpenditure":T.DoubleType(),
#     "growthFreeCashFlow":T.DoubleType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/cash-flow-statement-growth/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('cashflow_growth'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.cashflow_growth'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Error fetching data: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Read timed out. (read timeout=10)
Error fetching data: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Read timed out. (read timeout=10)
Error fetching data: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Read timed out. (read timeout=10)
Error fetching data: HTTPSConnectionPool(host='financialmodelingprep.com', port=443): Read timed out. (read timeout=10)
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 

Unnamed: 0,date,symbol,calendarYear,period,growthNetIncome,growthDepreciationAndAmortization,growthDeferredIncomeTax,growthStockBasedCompensation,growthChangeInWorkingCapital,growthAccountsReceivables,growthInventory,growthAccountsPayables,growthOtherWorkingCapital,growthOtherNonCashItems,growthNetCashProvidedByOperatingActivites,growthInvestmentsInPropertyPlantAndEquipment,growthAcquisitionsNet,growthPurchasesOfInvestments,growthSalesMaturitiesOfInvestments,growthOtherInvestingActivites,growthNetCashUsedForInvestingActivites,growthDebtRepayment,growthCommonStockIssued,growthCommonStockRepurchased,growthDividendsPaid,growthOtherFinancingActivites,growthNetCashUsedProvidedByFinancingActivities,growthEffectOfForexChangesOnCash,growthNetChangeInCash,growthCashAtEndOfPeriod,growthCashAtBeginningOfPeriod,growthOperatingCashFlow,growthCapitalExpenditure,growthFreeCashFlow
0,2024-12-31,GPCR,2024,Q4,-0.073638,0.030189,0.0,-0.031878,-1.052398,0.0,0.0,-1.067169,-1.028507,-1.528355,-0.851493,0.976651,0.0,0.481738,1.486775,-1.0,1.175792,0.0,1.0,0.0,0.0,1.812417,1.812417,0.0,0.987494,-0.015221,-0.548957,-0.851493,0.976642,-0.79005
1,2024-09-30,GPCR,2024,Q3,-0.305101,0.143543,0.0,0.435415,6.011577,0.0,0.0,2.436619,0.656878,0.201557,0.253742,-0.604834,0.0,0.098977,-0.161838,2.000007,0.075659,0.0,-2.001465,0.0,0.0,-0.286929,-1.001473,0.0,-1.73817,-0.548969,2.900879,0.253742,-0.604215,0.240082
2,2024-06-30,GPCR,2024,Q2,-0.001551,-0.648313,0.0,0.529155,0.750721,0.0,0.0,-3.413809,1.235622,-1.672163,0.224297,-1.640105,0.0,-7.887102,1.374078,-175.091139,-145.462614,0.0,681.663083,0.0,0.0,-1.837923,732.400881,0.0,9.879761,2.900983,-0.246248,0.224297,-1.640105,0.215477
3,2024-03-31,GPCR,2024,Q1,-0.053593,1.34562,1.0,0.307912,-3.984596,0.0,0.0,0.30745,-8.419422,0.045293,-0.552975,0.86039,1.0,0.865539,-0.416056,1581.0,1.007826,1.0,-0.997487,0.0,0.0,1.038212,-0.997514,1.0,-1.403772,-0.246248,1.563243,-0.552975,0.86039,-0.481995
4,2023-12-31,GPCR,2023,Q4,-0.033923,2.555924,0.234895,0.129171,-0.007165,0.0,0.0,1.583612,-0.738111,-4.910156,-0.065784,-0.412782,-0.213965,-14.87234,0.248344,-1.000032,-7.003817,-0.999995,385.68564,0.0,0.0,-39.839323,595.13141,0.0,6.880155,1.563236,0.247465,-0.065784,-0.412782,-0.079094
5,2023-09-30,GPCR,2023,Q3,-0.025,0.067568,-0.010341,0.091657,-0.553519,0.0,0.0,-3.277512,0.002343,2.136364,-0.216823,-3.293194,-1.451678,-1.615499,1.494514,1.451678,1.438489,-0.976393,2.452704,0.0,0.0,2.592593,2.592593,0.0,1.116936,0.247469,-0.679105,-0.216823,-3.293194,-0.251215
6,2023-06-30,GPCR,2023,Q2,-0.295132,0.057143,0.0,-0.328069,2.319213,0.0,0.0,2.132178,2.365236,1.335878,0.168111,0.0,0.0,1.12762,-1.657641,-0.397911,-0.401808,-0.024189,-1.0031,0.0,0.0,0.884436,-1.00175,0.0,-1.855581,-0.679105,3.848147,0.168111,0.0,0.158706
7,2023-03-31,GPCR,2023,Q1,-0.513684,-0.027778,-1.0,3.065811,-9.527027,0.0,0.0,-6.443548,-10.721875,0.67734,-0.688675,1.0,1.0,-19.689595,6.197959,-5.707645,-5.739967,0.0,0.0,0.0,0.0,-7.132911,538.107595,0.0,51.175912,3.848147,-0.07123,-0.688675,1.0,-0.678763
8,2022-12-31,GPCR,2022,Q4,0.04365,-0.052632,2.464286,-0.007962,-1.312896,0.0,0.0,-1.054746,0.621749,-9.731183,-0.14741,0.013889,-1.627796,0.800196,-0.333333,1.627796,1.62082,0.0,0.0,0.0,0.0,-0.220077,-0.220077,0.0,0.926963,-0.07123,-0.493737,-0.14741,0.013889,-0.146309
9,2022-09-30,GPCR,2022,Q3,0.109765,0.288136,-1.153846,-0.023328,-0.554474,0.0,0.0,0.438095,-1.525466,-0.03125,-0.038237,-2.358491,0.0284,-0.727006,1.8,-0.0284,-0.03627,1.0,-1.0,1.0,0.0,-1.008345,-1.008345,0.0,-6.627979,-0.493737,0.096166,-0.038237,-2.358491,-0.050886


In [None]:
display(spark.sql('describe history financial_modeling.cashflow_growth'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-21 17:08:50,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '7843', 'numOutputBytes': '1185769', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-21 17:05:34,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14240', 'numOutputBytes': '2397043', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-21 17:00:32,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14039', 'numOutputBytes': '2332633', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-21 16:56:07,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '15855', 'numOutputBytes': '2617363', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-21 16:51:34,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15078', 'numOutputBytes': '2537105', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-21 16:47:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '15548', 'numOutputBytes': '2671269', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-21 16:42:28,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '14470', 'numOutputBytes': '2395854', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-21 16:38:06,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '13594', 'numOutputBytes': '2279359', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-21 16:33:33,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '17874', 'numOutputBytes': '3156671', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-21 16:28:37,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '18610', 'numOutputBytes': '3311436', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.cashflow_growth'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,calendarYear,int,
3,period,string,
4,growthNetIncome,double,
5,growthDepreciationAndAmortization,double,
6,growthDeferredIncomeTax,double,
7,growthStockBasedCompensation,double,
8,growthChangeInWorkingCapital,double,
9,growthAccountsReceivables,double,


In [None]:
display(spark.sql('select * from financial_modeling.cashflow_growth limit 3'))

Unnamed: 0,date,symbol,calendarYear,period,growthNetIncome,growthDepreciationAndAmortization,growthDeferredIncomeTax,growthStockBasedCompensation,growthChangeInWorkingCapital,growthAccountsReceivables,growthInventory,growthAccountsPayables,growthOtherWorkingCapital,growthOtherNonCashItems,growthNetCashProvidedByOperatingActivites,growthInvestmentsInPropertyPlantAndEquipment,growthAcquisitionsNet,growthPurchasesOfInvestments,growthSalesMaturitiesOfInvestments,growthOtherInvestingActivites,growthNetCashUsedForInvestingActivites,growthDebtRepayment,growthCommonStockIssued,growthCommonStockRepurchased,growthDividendsPaid,growthOtherFinancingActivites,growthNetCashUsedProvidedByFinancingActivities,growthEffectOfForexChangesOnCash,growthNetChangeInCash,growthCashAtEndOfPeriod,growthCashAtBeginningOfPeriod,growthOperatingCashFlow,growthCapitalExpenditure,growthFreeCashFlow
0,2024-12-31,GPCR,2024,Q4,-0.073638,0.030189,0.0,-0.031878,-1.052398,0.0,0.0,-1.067169,-1.028507,-1.528355,-0.851493,0.976651,0.0,0.481738,1.486775,-1.0,1.175792,0.0,1.0,0.0,0.0,1.812417,1.812417,0.0,0.987494,-0.015221,-0.548957,-0.851493,0.976642,-0.79005
1,2024-09-30,GPCR,2024,Q3,-0.305101,0.143543,0.0,0.435415,6.011577,0.0,0.0,2.436619,0.656878,0.201557,0.253742,-0.604834,0.0,0.098977,-0.161838,2.000007,0.075659,0.0,-2.001465,0.0,0.0,-0.286929,-1.001473,0.0,-1.73817,-0.548969,2.900879,0.253742,-0.604215,0.240082
2,2024-06-30,GPCR,2024,Q2,-0.001551,-0.648313,0.0,0.529155,0.750721,0.0,0.0,-3.413809,1.235622,-1.672163,0.224297,-1.640105,0.0,-7.887102,1.374078,-175.091139,-145.462614,0.0,681.663083,0.0,0.0,-1.837923,732.400881,0.0,9.879761,2.900983,-0.246248,0.224297,-1.640105,0.215477


### 4.Income Growth

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "growthRevenue": T.DoubleType(),
#     "growthCostOfRevenue": T.DoubleType(),
#     "growthGrossProfit": T.DoubleType(),
#     "growthGrossProfitRatio": T.DoubleType(),
#     "growthResearchAndDevelopmentExpenses": T.DoubleType(),
#     "growthGeneralAndAdministrativeExpenses": T.DoubleType(),
#     "growthSellingAndMarketingExpenses": T.DoubleType(),
#     "growthOtherExpenses": T.DoubleType(),
#     "growthOperatingExpenses": T.DoubleType(),
#     "growthCostAndExpenses": T.DoubleType(),
#     "growthInterestExpense": T.DoubleType(),
#     "growthDepreciationAndAmortization": T.DoubleType(),
#     "growthEBITDA": T.DoubleType(),
#     "growthEBITDARatio": T.DoubleType(),
#     "growthOperatingIncome": T.DoubleType(),
#     "growthOperatingIncomeRatio": T.DoubleType(),
#     "growthTotalOtherIncomeExpensesNet": T.DoubleType(),
#     "growthIncomeBeforeTax": T.DoubleType(),
#     "growthIncomeBeforeTaxRatio": T.DoubleType(),
#     "growthIncomeTaxExpense": T.DoubleType(),
#     "growthNetIncome": T.DoubleType(),
#     "growthNetIncomeRatio": T.DoubleType(),
#     "growthEPS": T.DoubleType(),
#     "growthEPSDiluted": T.DoubleType(),
#     "growthWeightedAverageShsOut": T.DoubleType(),
#     "growthWeightedAverageShsOutDil": T.DoubleType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/income-statement-growth/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('income_growth'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.income_growth'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,date,symbol,calendarYear,period,growthRevenue,growthCostOfRevenue,growthGrossProfit,growthGrossProfitRatio,growthResearchAndDevelopmentExpenses,growthGeneralAndAdministrativeExpenses,growthSellingAndMarketingExpenses,growthOtherExpenses,growthOperatingExpenses,growthCostAndExpenses,growthInterestExpense,growthDepreciationAndAmortization,growthEBITDA,growthEBITDARatio,growthOperatingIncome,growthOperatingIncomeRatio,growthTotalOtherIncomeExpensesNet,growthIncomeBeforeTax,growthIncomeBeforeTaxRatio,growthIncomeTaxExpense,growthNetIncome,growthNetIncomeRatio,growthEPS,growthEPSDiluted,growthWeightedAverageShsOut,growthWeightedAverageShsOutDil
0,2022-06-30,LLNW,2022,Q2,0.282148,0.294629,0.253989,-0.021962,0.269813,0.189633,0.435165,0.46124,0.290484,0.292892,0.001523,0.21075,-1.366545,-0.845766,-0.965785,-0.533197,-0.001523,-0.89903,-0.481132,-96.092233,0.143125,0.331688,0.214286,0.214286,0.119887,0.119887
1,2022-03-31,LLNW,2022,Q1,-0.078333,-0.003004,-0.212564,-0.145638,0.859973,0.198704,-0.063137,0.057377,0.247609,0.088623,-0.024517,-0.011307,-5.533019,-5.918285,-3.840417,-4.251809,0.024517,-2.798518,-3.121358,-0.527523,-2.531405,-2.831543,-2.45679,-2.45679,0.011229,0.011229
2,2021-12-31,LLNW,2021,Q4,0.13918,0.023062,0.427985,0.25352,-0.010759,0.082114,0.35978,0.336986,0.14972,0.065997,0.029052,-0.034768,2.171271,2.02817,0.565523,0.618605,0.102667,0.495351,0.557006,1.066351,0.462741,0.528381,0.491844,0.491844,0.057039,0.057039
3,2021-09-30,LLNW,2021,Q3,0.141764,0.012004,0.676374,0.468232,0.00347,0.100333,0.035097,0.32969,0.06073,0.028012,0.002299,-0.00988,0.616728,0.664316,0.285435,0.374157,0.119201,0.264387,0.355722,-0.149194,0.262301,0.353895,0.275455,0.275455,0.005879,0.005879
4,2021-06-30,LLNW,2021,Q2,-0.055611,0.005297,-0.244258,-0.199755,-0.15148,-0.419601,-0.411896,0.016667,-0.353343,-0.149642,0.014774,0.041647,0.680984,0.662199,0.506719,0.477672,-0.170447,0.467727,0.436384,-0.046154,0.463434,0.431838,0.47619,0.47619,0.01416,0.01416
5,2021-03-31,LLNW,2021,Q1,-0.075786,0.00337,-0.257267,-0.196363,0.206672,0.734727,0.017484,-0.00369,0.294573,0.111372,-0.410902,0.034948,-118.642384,-128.289083,-3.026036,-3.356171,0.315616,-2.14252,-2.400207,-0.029851,-2.072452,-2.324393,-2.115727,-2.115727,0.008643,0.008643
6,2020-12-31,LLNW,2020,Q4,-0.064987,0.028341,-0.226031,-0.172237,-0.066175,-0.037027,-0.075555,0.411458,-0.053215,-0.003495,0.304062,0.003842,-0.959636,-0.956831,-1.594298,-1.774611,-0.297132,-1.051799,-1.194406,3.060606,-1.085069,-1.229988,-1.067485,-1.067485,0.007045,0.007045
7,2020-09-30,LLNW,2020,Q3,0.011905,0.079369,-0.086608,-0.097354,-0.026382,-0.053255,-0.043279,0.188854,-0.039782,0.029501,22.577465,0.053132,-0.101585,-0.112155,-0.878089,-0.855993,-3.34748,-1.463231,-1.434251,-0.514706,-1.30747,-1.280322,-1.263889,-1.263889,0.017741,0.017741
8,2020-06-30,LLNW,2020,Q2,0.026907,-0.041751,0.146899,0.116848,-0.008188,0.038696,-0.081133,-0.052786,-0.028133,-0.036098,6.1,0.035148,8.914286,8.654515,0.756518,0.762898,-2.968421,0.686873,0.695077,-0.227273,0.671486,0.680093,0.674208,0.674208,0.010642,0.010642
9,2020-03-31,LLNW,2020,Q1,-0.051839,0.070526,-0.209712,-0.166505,0.029126,0.043421,0.143764,0.042813,0.084081,0.076111,-0.782609,-0.022084,-0.94955,-0.946791,-2.981717,-3.090063,-1.641892,-2.907282,-3.011559,-0.145631,-3.138731,-3.255661,-3.114833,-3.221106,0.011573,-0.039071


In [None]:
display(spark.sql('describe history financial_modeling.income_growth'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-21 20:20:11,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '8211', 'numOutputBytes': '1370180', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-21 20:16:59,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14965', 'numOutputBytes': '2625788', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-21 20:12:23,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14659', 'numOutputBytes': '2571801', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-21 20:08:07,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '16641', 'numOutputBytes': '2878795', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-21 20:03:51,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15732', 'numOutputBytes': '2721128', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-21 19:59:28,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '16481', 'numOutputBytes': '2860394', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-21 19:55:04,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '15362', 'numOutputBytes': '2674116', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-21 19:50:43,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '14269', 'numOutputBytes': '2470245', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-21 19:46:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '18705', 'numOutputBytes': '3251546', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-21 19:41:35,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '19589', 'numOutputBytes': '3410535', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.income_growth'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,calendarYear,int,
3,period,string,
4,growthRevenue,double,
5,growthCostOfRevenue,double,
6,growthGrossProfit,double,
7,growthGrossProfitRatio,double,
8,growthResearchAndDevelopmentExpenses,double,
9,growthGeneralAndAdministrativeExpenses,double,


In [None]:
display(spark.sql('select * from financial_modeling.income_growth limit 3'))

Unnamed: 0,date,symbol,calendarYear,period,growthRevenue,growthCostOfRevenue,growthGrossProfit,growthGrossProfitRatio,growthResearchAndDevelopmentExpenses,growthGeneralAndAdministrativeExpenses,growthSellingAndMarketingExpenses,growthOtherExpenses,growthOperatingExpenses,growthCostAndExpenses,growthInterestExpense,growthDepreciationAndAmortization,growthEBITDA,growthEBITDARatio,growthOperatingIncome,growthOperatingIncomeRatio,growthTotalOtherIncomeExpensesNet,growthIncomeBeforeTax,growthIncomeBeforeTaxRatio,growthIncomeTaxExpense,growthNetIncome,growthNetIncomeRatio,growthEPS,growthEPSDiluted,growthWeightedAverageShsOut,growthWeightedAverageShsOutDil
0,2022-06-30,LLNW,2022,Q2,0.282148,0.294629,0.253989,-0.021962,0.269813,0.189633,0.435165,0.46124,0.290484,0.292892,0.001523,0.21075,-1.366545,-0.845766,-0.965785,-0.533197,-0.001523,-0.89903,-0.481132,-96.092233,0.143125,0.331688,0.214286,0.214286,0.119887,0.119887
1,2022-03-31,LLNW,2022,Q1,-0.078333,-0.003004,-0.212564,-0.145638,0.859973,0.198704,-0.063137,0.057377,0.247609,0.088623,-0.024517,-0.011307,-5.533019,-5.918285,-3.840417,-4.251809,0.024517,-2.798518,-3.121358,-0.527523,-2.531405,-2.831543,-2.45679,-2.45679,0.011229,0.011229
2,2021-12-31,LLNW,2021,Q4,0.13918,0.023062,0.427985,0.25352,-0.010759,0.082114,0.35978,0.336986,0.14972,0.065997,0.029052,-0.034768,2.171271,2.02817,0.565523,0.618605,0.102667,0.495351,0.557006,1.066351,0.462741,0.528381,0.491844,0.491844,0.057039,0.057039


### 5.Balance Sheet Growth

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "growthCashAndCashEquivalents": T.DoubleType(),
#     "growthShortTermInvestments": T.DoubleType(),
#     "growthCashAndShortTermInvestments": T.DoubleType(),
#     "growthNetReceivables": T.DoubleType(),
#     "growthInventory": T.DoubleType(),
#     "growthOtherCurrentAssets": T.DoubleType(),
#     "growthTotalCurrentAssets": T.DoubleType(),
#     "growthPropertyPlantEquipmentNet": T.DoubleType(),
#     "growthGoodwill": T.DoubleType(),
#     "growthIntangibleAssets": T.DoubleType(),
#     "growthGoodwillAndIntangibleAssets": T.DoubleType(),
#     "growthLongTermInvestments": T.DoubleType(),
#     "growthTaxAssets": T.DoubleType(),
#     "growthOtherNonCurrentAssets": T.DoubleType(),
#     "growthTotalNonCurrentAssets": T.DoubleType(),
#     "growthOtherAssets": T.DoubleType(),
#     "growthTotalAssets": T.DoubleType(),
#     "growthAccountPayables": T.DoubleType(),
#     "growthShortTermDebt": T.DoubleType(),
#     "growthTaxPayables": T.DoubleType(),
#     "growthDeferredRevenue": T.DoubleType(),
#     "growthOtherCurrentLiabilities": T.DoubleType(),
#     "growthTotalCurrentLiabilities": T.DoubleType(),
#     "growthLongTermDebt": T.DoubleType(),
#     "growthDeferredRevenueNonCurrent": T.DoubleType(),
#     "growthDeferrredTaxLiabilitiesNonCurrent": T.DoubleType(),
#     "growthOtherNonCurrentLiabilities": T.DoubleType(),
#     "growthTotalNonCurrentLiabilities": T.DoubleType(),
#     "growthOtherLiabilities": T.DoubleType(),
#     "growthTotalLiabilities": T.DoubleType(),
#     "growthCommonStock": T.DoubleType(),
#     "growthRetainedEarnings": T.DoubleType(),
#     "growthAccumulatedOtherComprehensiveIncomeLoss": T.DoubleType(),
#     "growthOthertotalStockholdersEquity": T.DoubleType(),
#     "growthTotalStockholdersEquity": T.DoubleType(),
#     "growthTotalLiabilitiesAndStockholdersEquity": T.DoubleType(),
#     "growthTotalInvestments": T.DoubleType(),
#     "growthTotalDebt": T.DoubleType(),
#     "growthNetDebt": T.DoubleType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/balance-sheet-statement-growth/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('balance_sheet_growth'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.balance_sheet_growth'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,date,symbol,calendarYear,period,growthCashAndCashEquivalents,growthShortTermInvestments,growthCashAndShortTermInvestments,growthNetReceivables,growthInventory,growthOtherCurrentAssets,growthTotalCurrentAssets,growthPropertyPlantEquipmentNet,growthGoodwill,growthIntangibleAssets,growthGoodwillAndIntangibleAssets,growthLongTermInvestments,growthTaxAssets,growthOtherNonCurrentAssets,growthTotalNonCurrentAssets,growthOtherAssets,growthTotalAssets,growthAccountPayables,growthShortTermDebt,growthTaxPayables,growthDeferredRevenue,growthOtherCurrentLiabilities,growthTotalCurrentLiabilities,growthLongTermDebt,growthDeferredRevenueNonCurrent,growthDeferrredTaxLiabilitiesNonCurrent,growthOtherNonCurrentLiabilities,growthTotalNonCurrentLiabilities,growthOtherLiabilities,growthTotalLiabilities,growthCommonStock,growthRetainedEarnings,growthAccumulatedOtherComprehensiveIncomeLoss,growthOthertotalStockholdersEquity,growthTotalStockholdersEquity,growthTotalLiabilitiesAndStockholdersEquity,growthTotalInvestments,growthTotalDebt,growthNetDebt
0,2024-12-31,RDDT,2024,Q4,0.089547,0.040392,0.054925,0.189223,0.0,6.325535,0.078886,-0.046378,0.0,-0.088876,-0.035389,0.0,0.0,2.075825,0.020897,0.0,0.075925,-0.267867,0.136903,0.0,0.149546,1.195275,0.021708,0.0,0.0,0.0,36.176707,0.366978,0.0,0.060515,0.0,0.055851,-0.994217,0.0,0.077437,0.075925,0.040392,-0.009753,-0.095024
1,2024-09-30,RDDT,2024,Q3,0.102453,-0.001646,0.027024,0.135292,0.0,0.133483,0.040433,-0.041451,0.603635,0.004753,0.296037,-1.0,0.0,0.417904,0.159571,0.0,0.04592,-0.035552,0.005589,0.0,0.336134,-0.356601,0.080049,0.0,0.0,0.0,-0.023529,-0.051973,0.0,0.063404,0.0,0.022933,5.133466,0.0,0.044235,0.04592,-0.001687,-0.041244,-0.111642
2,2024-06-30,RDDT,2024,Q2,-0.516836,0.754115,0.017183,0.126524,0.0,0.26277,0.031624,0.057514,0.0,-0.011673,-0.00603,0.0,1.0,-0.480971,-0.002629,-1.0,0.029995,0.417625,0.0,0.0,0.138285,0.166225,0.019363,0.0,0.0,0.0,-0.076084,0.090048,-1.0,0.027765,0.0625,-0.007819,-1.127119,0.0,0.03021,0.029995,0.754061,0.115275,0.533734
3,2024-03-31,RDDT,2024,Q1,1.41419,-0.135614,0.376902,-0.091802,-1.0,-0.811672,0.297589,-0.046593,0.0,-0.073219,-0.039209,0.0,-1.0,1.044814,0.003118,-1.0,0.262723,-0.024423,0.0,0.0,0.168,0.128727,0.171558,0.0,0.0,0.0,-0.038331,-0.988746,1.0,0.139188,1.666667,-0.802535,-1.579853,0.0,0.276092,0.262723,-0.135552,-0.020546,-1.512584
4,2023-12-31,RDDT,2023,Q4,-0.0763,0.031459,-0.006856,0.272657,0.0,0.195502,0.029587,-0.001589,0.0,-0.1184,-0.067094,0.0,-0.041482,0.041461,-0.041461,0.135643,0.026307,0.316894,0.0,0.0,-0.007393,-0.244148,-0.001689,0.0,0.0,0.0,-0.036916,-0.000622,-5.395222e-10,-0.008856,0.0,0.025213,1.528571,0.0,0.030262,0.026307,0.031459,-0.019386,0.079962
5,2023-09-30,RDDT,2023,Q3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2023-03-31,RDDT,2023,Q1,-3.906184,2.049217,0.0,-1.0,-1.0,-1.0,-0.146492,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-12.166422,-14.166422,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-98.964135,0.0,0.0,-0.078316,2.049217,-1.0,4.042251
7,2022-12-31,RDDT,2022,Q4,-0.099925,0.019701,-0.024893,0.223773,0.0,0.01484,-0.001962,-0.096652,-0.00781,-0.040546,-0.027695,0.0,0.0,0.050704,-0.050704,0.103295,-0.003874,-0.084202,0.0,0.0,-0.095829,1.18178,0.001138,0.0,0.0,0.0,-0.217715,87.479656,0.0,-0.017365,0.0,-0.036829,0.351462,0.0,-0.00271,-0.003874,0.019701,0.619071,0.118258
8,2022-09-30,RDDT,2022,Q3,-0.176877,0.064889,-0.040201,0.197456,0.0,1.16456,-0.026184,0.696623,1.108336,0.491238,0.684824,-1.0,-1.0,-0.688743,0.688743,0.02476,0.001108,0.42473,0.0,0.0,0.264974,-0.567612,0.181721,0.0,0.0,0.0,0.647165,1.524878,0.0,0.296162,0.0,-0.081162,-0.22733,0.0,-0.018166,0.001108,0.064889,0.159491,0.182921
9,2022-06-30,RDDT,2022,Q2,-0.146237,0.053532,-0.043729,-0.056413,0.0,-0.368263,-0.043119,-0.049597,0.942822,67.248139,4.830084,0.0,1.157198,-1.15512,1.15512,-0.013887,-0.022476,0.035347,0.0,0.0,0.070534,0.124208,-0.013409,0.0,0.0,0.0,10.634454,0.613903,0.0,0.020384,0.0,-0.110913,-0.704472,0.0,-0.025151,-0.022476,0.053532,-0.152201,0.14613


In [None]:
display(spark.sql('describe history financial_modeling.balance_sheet_growth'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-21 23:39:37,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '7899', 'numOutputBytes': '1493294', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-21 23:36:14,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '14301', 'numOutputBytes': '3151953', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-21 23:31:25,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14088', 'numOutputBytes': '2986644', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-21 23:26:48,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '15903', 'numOutputBytes': '3430898', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-21 23:22:06,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15016', 'numOutputBytes': '3185810', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-21 23:17:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '15674', 'numOutputBytes': '3422740', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-21 23:12:34,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '14657', 'numOutputBytes': '3120363', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-21 23:07:57,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '13662', 'numOutputBytes': '2922562', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-21 23:03:23,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '17954', 'numOutputBytes': '4017695', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-21 22:58:30,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '18688', 'numOutputBytes': '4240420', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.balance_sheet_growth'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,symbol,string,
2,calendarYear,int,
3,period,string,
4,growthCashAndCashEquivalents,double,
5,growthShortTermInvestments,double,
6,growthCashAndShortTermInvestments,double,
7,growthNetReceivables,double,
8,growthInventory,double,
9,growthOtherCurrentAssets,double,


In [None]:
display(spark.sql('select * from financial_modeling.balance_sheet_growth limit 3'))

Unnamed: 0,date,symbol,calendarYear,period,growthCashAndCashEquivalents,growthShortTermInvestments,growthCashAndShortTermInvestments,growthNetReceivables,growthInventory,growthOtherCurrentAssets,growthTotalCurrentAssets,growthPropertyPlantEquipmentNet,growthGoodwill,growthIntangibleAssets,growthGoodwillAndIntangibleAssets,growthLongTermInvestments,growthTaxAssets,growthOtherNonCurrentAssets,growthTotalNonCurrentAssets,growthOtherAssets,growthTotalAssets,growthAccountPayables,growthShortTermDebt,growthTaxPayables,growthDeferredRevenue,growthOtherCurrentLiabilities,growthTotalCurrentLiabilities,growthLongTermDebt,growthDeferredRevenueNonCurrent,growthDeferrredTaxLiabilitiesNonCurrent,growthOtherNonCurrentLiabilities,growthTotalNonCurrentLiabilities,growthOtherLiabilities,growthTotalLiabilities,growthCommonStock,growthRetainedEarnings,growthAccumulatedOtherComprehensiveIncomeLoss,growthOthertotalStockholdersEquity,growthTotalStockholdersEquity,growthTotalLiabilitiesAndStockholdersEquity,growthTotalInvestments,growthTotalDebt,growthNetDebt
0,2024-12-31,RDDT,2024,Q4,0.089547,0.040392,0.054925,0.189223,0.0,6.325535,0.078886,-0.046378,0.0,-0.088876,-0.035389,0.0,0.0,2.075825,0.020897,0.0,0.075925,-0.267867,0.136903,0.0,0.149546,1.195275,0.021708,0.0,0.0,0.0,36.176707,0.366978,0.0,0.060515,0.0,0.055851,-0.994217,0.0,0.077437,0.075925,0.040392,-0.009753,-0.095024
1,2024-09-30,RDDT,2024,Q3,0.102453,-0.001646,0.027024,0.135292,0.0,0.133483,0.040433,-0.041451,0.603635,0.004753,0.296037,-1.0,0.0,0.417904,0.159571,0.0,0.04592,-0.035552,0.005589,0.0,0.336134,-0.356601,0.080049,0.0,0.0,0.0,-0.023529,-0.051973,0.0,0.063404,0.0,0.022933,5.133466,0.0,0.044235,0.04592,-0.001687,-0.041244,-0.111642
2,2024-06-30,RDDT,2024,Q2,-0.516836,0.754115,0.017183,0.126524,0.0,0.26277,0.031624,0.057514,0.0,-0.011673,-0.00603,0.0,1.0,-0.480971,-0.002629,-1.0,0.029995,0.417625,0.0,0.0,0.138285,0.166225,0.019363,0.0,0.0,0.0,-0.076084,0.090048,-1.0,0.027765,0.0625,-0.007819,-1.127119,0.0,0.03021,0.029995,0.754061,0.115275,0.533734


### 6.Financial Growth

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "calendarYear": T.IntegerType(),
#     "revenueGrowth": T.DoubleType(),
#     "grossProfitGrowth": T.DoubleType(),
#     "ebitgrowth": T.DoubleType(),
#     "operatingIncomeGrowth": T.DoubleType(),
#     "netIncomeGrowth": T.DoubleType(),
#     "epsgrowth": T.DoubleType(),
#     "epsdilutedGrowth": T.DoubleType(),
#     "weightedAverageSharesGrowth": T.DoubleType(),
#     "weightedAverageSharesDilutedGrowth": T.DoubleType(),
#     "dividendsperShareGrowth": T.DoubleType(),
#     "operatingCashFlowGrowth": T.DoubleType(),
#     "freeCashFlowGrowth": T.DoubleType(),
#     "tenYRevenueGrowthPerShare": T.DoubleType(),
#     "fiveYRevenueGrowthPerShare": T.DoubleType(),
#     "threeYRevenueGrowthPerShare": T.DoubleType(),
#     "tenYOperatingCFGrowthPerShare": T.DoubleType(),
#     "fiveYOperatingCFGrowthPerShare": T.DoubleType(),
#     "threeYOperatingCFGrowthPerShare": T.DoubleType(),
#     "fiveYNetIncomeGrowthPerShare": T.DoubleType(),
#     "threeYNetIncomeGrowthPerShare": T.DoubleType(),
#     "tenYShareholdersEquityGrowthPerShare": T.DoubleType(),
#     "fiveYShareholdersEquityGrowthPerShare": T.DoubleType(),
#     "threeYShareholdersEquityGrowthPerShare": T.DoubleType(),
#     "tenYDividendperShareGrowthPerShare": T.DoubleType(),
#     "fiveYDividendperShareGrowthPerShare": T.DoubleType(),
#     "threeYDividendperShareGrowthPerShare": T.DoubleType(),
#     "receivablesGrowth": T.DoubleType(),
#     "inventoryGrowth": T.DoubleType(),
#     "assetGrowth": T.DoubleType(),
#     "bookValueperShareGrowth": T.DoubleType(),
#     "debtGrowth": T.DoubleType(),
#     "rdexpenseGrowth": T.DoubleType(),
#     "sgaexpensesGrowth": T.DoubleType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/financial-growth/{stock}?period=quarter'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('financial_growth'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.financial_growth'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Error fetching data: 502 Server Error: Bad Gateway for url: https://financialmodelingprep.com/api/v3/financial-growth/AGIL?period=quarter&apikey=YaMBp5i8VfOlobC4G4bYDb6vaaDJ8LWh
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds

Unnamed: 0,symbol,date,calendarYear,period,revenueGrowth,grossProfitGrowth,ebitgrowth,operatingIncomeGrowth,netIncomeGrowth,epsgrowth,epsdilutedGrowth,weightedAverageSharesGrowth,weightedAverageSharesDilutedGrowth,dividendsperShareGrowth,operatingCashFlowGrowth,freeCashFlowGrowth,tenYRevenueGrowthPerShare,fiveYRevenueGrowthPerShare,threeYRevenueGrowthPerShare,tenYOperatingCFGrowthPerShare,fiveYOperatingCFGrowthPerShare,threeYOperatingCFGrowthPerShare,tenYNetIncomeGrowthPerShare,fiveYNetIncomeGrowthPerShare,threeYNetIncomeGrowthPerShare,tenYShareholdersEquityGrowthPerShare,fiveYShareholdersEquityGrowthPerShare,threeYShareholdersEquityGrowthPerShare,tenYDividendperShareGrowthPerShare,fiveYDividendperShareGrowthPerShare,threeYDividendperShareGrowthPerShare,receivablesGrowth,inventoryGrowth,assetGrowth,bookValueperShareGrowth,debtGrowth,rdexpenseGrowth,sgaexpensesGrowth
0,BTOC,2024-12-31,2025,Q2,0.203894,1.128837,0.700896,0.700896,0.643011,0.638182,0.638182,0.000203,-0.001715,0.0,-0.562365,-0.280533,0.752969,0.752969,0.752969,-15.238976,-15.238976,-15.238976,-1.484268360510297,-1.484268,-1.484268,0.0,0.0,0.0,0.0,0.0,0.0,0.198359,0.0,0.039519,-0.040935,0.068145,0.0,-0.275202
1,BTOC,2024-09-30,2025,Q1,-0.061944,-3.021679,-446.207265,-446.207265,-18.917983,0.0,0.0,0.0,0.0,0.0,-2.786137,-0.609464,0.496525,0.496525,0.496525,-1.799593,-1.799593,-1.799593,-2.4767577747883647,-2.476758,-2.476758,2.2701,2.2701,2.2701,0.0,0.0,0.0,0.045738,0.0,-0.057138,0.0,-0.020482,0.0,0.104076
2,BTOC,2024-06-30,2024,Q4,0.178128,-0.426813,-1.285171,-1.285171,-0.619775,-1.0,-1.0,-1.0,-1.0,0.0,-5.208656,-33.736643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.148417,-1.0,-0.021414,-1.0,-0.058501,0.0,0.0456
3,BTOC,2024-03-31,2024,Q3,-0.084852,-0.574837,-0.98801,-0.98801,-0.817823,-0.817778,-0.817778,0.0,0.0,0.0,-0.679107,1.085609,1.897956,1.897956,1.897956,0.0,0.0,0.0,2.1546575868697566,2.154658,2.154658,0.0,0.0,0.0,0.0,0.0,0.0,0.069405,0.0,0.31566,0.027283,0.457331,0.0,0.114789
4,BTOC,2023-12-31,2024,Q2,0.018383,0.473964,0.433966,0.433966,0.359394,0.359517,0.359517,0.0,0.0,0.0,-0.769827,-1.57341,0.441174,0.441174,0.441174,0.784183,0.784183,0.784183,0.0940855267194306,0.094086,0.094086,0.0,0.0,0.0,0.0,0.0,0.0,-0.0649,0.0,0.0282,0.149656,0.000921,0.0,0.545935
5,BTOC,2023-09-30,2024,Q1,-0.142192,-0.233077,-0.438178,-0.438178,-0.444696,-0.448333,-0.448333,0.039999,0.04,0.0,1.311636,0.475284,0.454169,0.454169,0.454169,-0.320061,-0.320061,-0.320061,-0.1240157492834152,-0.124016,-0.124016,1.664647,1.664647,1.664647,0.0,0.0,0.0,0.334741,-1.0,0.424865,0.069788,0.701963,0.0,3.389024
6,BTOC,2023-06-30,2023,Q4,0.595664,0.08152,0.830294,0.830294,1.27655,1.290076,1.290076,-0.038461,-0.038462,0.0,-0.762491,-0.693476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.464081,0.219901,0.18443,0.301343,0.141929,0.0,-1.257562
7,BTOC,2023-03-31,2023,Q3,0.033889,-0.133953,-0.319775,-0.319775,-0.363354,-0.363305,-0.363305,0.0,0.0,0.0,13.118398,4894.385681,1.271737,1.271737,1.271737,0.0,0.0,0.0,4.688139354095445,4.688139,4.688139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.226949
8,BTOC,2022-12-31,2023,Q2,0.027566,0.328783,0.171452,0.171452,0.088404,0.088624,0.088624,0.0,0.0,0.0,-0.912283,-1.000197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.797704
9,BTOC,2022-09-30,2023,Q1,0.112422,0.593668,0.787625,0.787625,0.553674,0.0,0.0,0.0,0.0,0.0,1.65789,4.920338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.268206,0.0,-1.0,0.0,-1.0,0.0,1.28589


In [None]:
display(spark.sql('describe history financial_modeling.financial_growth'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-22 02:13:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '8279', 'numOutputBytes': '1759415', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-22 02:10:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '15090', 'numOutputBytes': '3445951', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-22 02:05:24,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '14772', 'numOutputBytes': '3327478', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-22 02:00:52,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '16799', 'numOutputBytes': '3792471', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-22 01:56:21,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '15791', 'numOutputBytes': '3547984', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-22 01:51:47,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '16566', 'numOutputBytes': '3756281', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-22 01:47:12,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '15508', 'numOutputBytes': '3503014', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-22 01:42:40,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '14368', 'numOutputBytes': '3241747', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-22 01:38:15,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '18835', 'numOutputBytes': '4278294', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-22 01:33:37,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '19647', 'numOutputBytes': '4501137', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.financial_growth'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,date,date,
2,calendarYear,int,
3,period,string,
4,revenueGrowth,double,
5,grossProfitGrowth,double,
6,ebitgrowth,double,
7,operatingIncomeGrowth,double,
8,netIncomeGrowth,double,
9,epsgrowth,double,


In [None]:
display(spark.sql('select * from financial_modeling.financial_growth limit 3'))

Unnamed: 0,symbol,date,calendarYear,period,revenueGrowth,grossProfitGrowth,ebitgrowth,operatingIncomeGrowth,netIncomeGrowth,epsgrowth,epsdilutedGrowth,weightedAverageSharesGrowth,weightedAverageSharesDilutedGrowth,dividendsperShareGrowth,operatingCashFlowGrowth,freeCashFlowGrowth,tenYRevenueGrowthPerShare,fiveYRevenueGrowthPerShare,threeYRevenueGrowthPerShare,tenYOperatingCFGrowthPerShare,fiveYOperatingCFGrowthPerShare,threeYOperatingCFGrowthPerShare,tenYNetIncomeGrowthPerShare,fiveYNetIncomeGrowthPerShare,threeYNetIncomeGrowthPerShare,tenYShareholdersEquityGrowthPerShare,fiveYShareholdersEquityGrowthPerShare,threeYShareholdersEquityGrowthPerShare,tenYDividendperShareGrowthPerShare,fiveYDividendperShareGrowthPerShare,threeYDividendperShareGrowthPerShare,receivablesGrowth,inventoryGrowth,assetGrowth,bookValueperShareGrowth,debtGrowth,rdexpenseGrowth,sgaexpensesGrowth
0,BTOC,2024-12-31,2025,Q2,0.203894,1.128837,0.700896,0.700896,0.643011,0.638182,0.638182,0.000203,-0.001715,0.0,-0.562365,-0.280533,0.752969,0.752969,0.752969,-15.238976,-15.238976,-15.238976,-1.484268360510297,-1.484268,-1.484268,0.0,0.0,0.0,0.0,0.0,0.0,0.198359,0.0,0.039519,-0.040935,0.068145,0.0,-0.275202
1,BTOC,2024-09-30,2025,Q1,-0.061944,-3.021679,-446.207265,-446.207265,-18.917983,0.0,0.0,0.0,0.0,0.0,-2.786137,-0.609464,0.496525,0.496525,0.496525,-1.799593,-1.799593,-1.799593,-2.4767577747883647,-2.476758,-2.476758,2.2701,2.2701,2.2701,0.0,0.0,0.0,0.045738,0.0,-0.057138,0.0,-0.020482,0.0,0.104076
2,BTOC,2024-06-30,2024,Q4,0.178128,-0.426813,-1.285171,-1.285171,-0.619775,-1.0,-1.0,-1.0,-1.0,0.0,-5.208656,-33.736643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.148417,-1.0,-0.021414,-1.0,-0.058501,0.0,0.0456


### 7.Financial Score

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "altmanZScore": T.DoubleType(),
#     "piotroskiScore": T.LongType(),
#     "workingCapital": T.LongType(),
#     "totalAssets": T.LongType(),
#     "retainedEarnings": T.LongType(),
#     "ebit": T.LongType(),
#     "marketCap": T.LongType(),
#     "totalLiabilities": T.LongType(),
#     "revenue": T.LongType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v4/score?symbol={stock}'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('financial_score'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}&apikey={API_KEY}"
#     data = get_jsonparsed_data(url)

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.financial_score'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,symbol,altmanZScore,piotroskiScore,workingCapital,totalAssets,retainedEarnings,ebit,marketCap,totalLiabilities,revenue
0,XPON,-8.502935,3,2000071,10128975,-34323594,-14406485,3207357,7584126,4498182
1,IMMX,-2.331766,2,16191004,25141552,-70212617,-22855673,48138365,7590841,4784
2,TGTX,9.537713,4,475680000,577690000,-1529194000,49688000,6752482290,355326000,329004000
3,DCGO,0.47683,7,182679686000000,455621132000000,-1402167000000,29678209,272294610,140442002000000,616555132
4,ISPR,0.433258,3,6084432000,132042434000,-22418700000,-21556660,178123836,107808346000,81233531483
5,ACGL,1.300524,6,0,70906000000,22686000000,4261000000,34645451100,50086000000,16965000000
6,OTLY,-1.923709,0,-235934000,803980000,-1249303000,-98555389,292995042,697857000,608223442
7,BMRC,0.614259,6,1382374000,3701335000,249964000,-28626000,356015672,3265928000,117160000
8,LRMR,3.146152,1,196971000,219021000,-240334000,-73833000,162068764,20621000,0
9,GEOS,4.994915,3,78197000,159939000,63658000,-10579000,94803856,17229000,122789000


In [None]:
display(spark.sql('describe history financial_modeling.financial_score'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-03-22 14:21:45,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '186', 'numOutputBytes': '13604', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
1,31,2025-03-22 14:19:00,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '260', 'numOutputBytes': '18320', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
2,30,2025-03-22 14:15:12,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '256', 'numOutputBytes': '18280', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
3,29,2025-03-22 14:11:38,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '267', 'numOutputBytes': '18800', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
4,28,2025-03-22 14:07:48,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '269', 'numOutputBytes': '18744', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
5,27,2025-03-22 14:04:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '266', 'numOutputBytes': '19060', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
6,26,2025-03-22 14:00:27,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '267', 'numOutputBytes': '18826', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
7,25,2025-03-22 13:56:34,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '256', 'numOutputBytes': '18193', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
8,24,2025-03-22 13:52:46,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '280', 'numOutputBytes': '19611', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0
9,23,2025-03-22 13:49:09,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '286', 'numOutputBytes': '19969', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.0


In [None]:
display(spark.sql('describe financial_modeling.financial_score'), n=100)

Unnamed: 0,col_name,data_type,comment
0,symbol,string,
1,altmanZScore,double,
2,piotroskiScore,bigint,
3,workingCapital,bigint,
4,totalAssets,bigint,
5,retainedEarnings,bigint,
6,ebit,bigint,
7,marketCap,bigint,
8,totalLiabilities,bigint,
9,revenue,bigint,


In [None]:
display(spark.sql('select * from financial_modeling.financial_score limit 3'))

Unnamed: 0,symbol,altmanZScore,piotroskiScore,workingCapital,totalAssets,retainedEarnings,ebit,marketCap,totalLiabilities,revenue
0,XPON,-8.502935,3,2000071,10128975,-34323594,-14406485,3207357,7584126,4498182
1,IMMX,-2.331766,2,16191004,25141552,-70212617,-22855673,48138365,7590841,4784
2,TGTX,9.537713,4,475680000,577690000,-1529194000,49688000,6752482290,355326000,329004000


## Dividends

### 1.Dividends Historical

In [None]:
# request_counter = 0
# sleeping_time = 70

# column_types = {

#     "date": T.DateType(),
#     "adjDividend": T.DoubleType(),
#     "dividend": T.DoubleType(),
#     "recordDate": T.DateType(),
#     "paymentDate": T.DateType(),
#     "declarationDate": T.DateType(),

# }

# dfs = []
# missing_stocks = []

# if API_KEY:

#   time.sleep(sleeping_time)

#   for stock in stock_list:

#     base_url = f'https://financialmodelingprep.com/api/v3/historical-price-full/stock_dividend/{stock}'

#     if request_counter==300 or stock==stock_list[-1]:

#         combined_df = dfs[0]
#         for df in dfs[1:]:
#             combined_df = combined_df.union(df)

#         for col_name, dtype in column_types.items():
#           combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

#         combined_df = combined_df.repartition(1)

#         (combined_df.write
#             .format("delta")
#             .mode('append')
#             .saveAsTable('dividends_historical'))

#         print('Data was appended successfully.')

#         dfs = []
#         request_counter = 0

#         print(f"Sleeping for {sleeping_time} seconds...")
#         time.sleep(sleeping_time)


#     url = f"{base_url}?apikey={API_KEY}"
#     result = get_jsonparsed_data(url)
#     data = result['historical']

#     if data and isinstance(data, list) and len(data) > 0:

#         schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
#         df = spark.createDataFrame(data, schema=schema)
#         df = df.withColumn('symbol', F.lit(stock))
#         dfs.append(df)
#         request_counter += 1

#     else:
#         missing_stocks.append(stock)
#         request_counter += 1


# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")



# print(f"Missing stocks: {len(missing_stocks)}.")
# display(spark.sql('select * from financial_modeling.dividends_historical'))

Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds...
Data was appended successfully.
Sleeping for 70 seconds.

Unnamed: 0,date,label,adjDividend,dividend,recordDate,paymentDate,declarationDate,symbol
0,2025-03-14,"March 14, 25",0.91,0.91,2025-03-14,2025-04-04,2025-02-27,CB
1,2024-12-13,"December 13, 24",0.91,0.91,2024-12-13,2025-01-03,2024-11-21,CB
2,2024-09-13,"September 13, 24",0.91,0.91,2024-09-13,2024-10-04,2024-08-08,CB
3,2024-06-14,"June 14, 24",0.91,0.91,2024-06-14,2024-07-05,2024-05-16,CB
4,2024-03-14,"March 14, 24",0.86,0.86,2024-03-15,2024-04-05,2024-02-22,CB
5,2023-12-14,"December 14, 23",0.86,0.86,2023-12-15,2024-01-05,2023-11-16,CB
6,2023-09-14,"September 14, 23",0.86,0.86,2023-09-15,2023-10-06,2023-08-10,CB
7,2023-06-15,"June 15, 23",0.86,0.86,2023-06-16,2023-07-07,2023-05-17,CB
8,2023-03-16,"March 16, 23",0.83,0.83,2023-03-17,2023-04-10,2023-02-23,CB
9,2022-12-15,"December 15, 22",0.83,0.83,2022-12-16,2023-01-06,2022-11-17,CB


In [None]:
display(spark.sql('describe history financial_modeling.dividends_historical'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-05-04 07:37:25,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '1302', 'numOutputBytes': '30176', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
1,31,2025-05-04 07:35:46,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '7127', 'numOutputBytes': '147443', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
2,30,2025-05-04 07:33:21,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '4462', 'numOutputBytes': '100157', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
3,29,2025-05-04 07:31:30,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '4385', 'numOutputBytes': '103133', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
4,28,2025-05-04 07:29:33,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '3479', 'numOutputBytes': '82137', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
5,27,2025-05-04 07:27:42,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '4677', 'numOutputBytes': '111143', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
6,26,2025-05-04 07:25:41,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '4535', 'numOutputBytes': '105790', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
7,25,2025-05-04 07:23:47,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '3699', 'numOutputBytes': '86780', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
8,24,2025-05-04 07:21:53,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '7056', 'numOutputBytes': '158438', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
9,23,2025-05-04 07:19:54,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '6648', 'numOutputBytes': '153024', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1


In [None]:
display(spark.sql('describe financial_modeling.dividends_historical'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,label,string,
2,adjDividend,double,
3,dividend,double,
4,recordDate,date,
5,paymentDate,date,
6,declarationDate,date,
7,symbol,string,


In [None]:
display(spark.sql('select * from financial_modeling.dividends_historical limit 3'))

Unnamed: 0,date,label,adjDividend,dividend,recordDate,paymentDate,declarationDate,symbol
0,2025-03-14,"March 14, 25",0.91,0.91,2025-03-14,2025-04-04,2025-02-27,CB
1,2024-12-13,"December 13, 24",0.91,0.91,2024-12-13,2025-01-03,2024-11-21,CB
2,2024-09-13,"September 13, 24",0.91,0.91,2024-09-13,2024-10-04,2024-08-08,CB


## Splits

### 1.Splits Historical

In [None]:
request_counter = 0
sleeping_time = 90

column_types = {

    "date": T.DateType(),
    "numerator": T.IntegerType(),
    "denominator": T.IntegerType(),

}

dfs = []
missing_stocks = []

if API_KEY:

  time.sleep(sleeping_time)

  for stock in stock_list:

    base_url = f'https://financialmodelingprep.com/api/v3/historical-price-full/stock_split/{stock}'

    if request_counter==300 or stock==stock_list[-1]:

        combined_df = dfs[0]
        for df in dfs[1:]:
            combined_df = combined_df.union(df)

        for col_name, dtype in column_types.items():
          combined_df = combined_df.withColumn(col_name, F.col(col_name).cast(dtype))

        combined_df = combined_df.repartition(1)

        (combined_df.write
            .format("delta")
            .mode('append')
            .saveAsTable('splits_historical'))

        print('Data was appended successfully.')

        dfs = []
        request_counter = 0

        print(f"Sleeping for {sleeping_time} seconds...")
        time.sleep(sleeping_time)


    url = f"{base_url}?apikey={API_KEY}"
    result = get_jsonparsed_data(url)
    if 'historical' in result.keys():
       data = result['historical']
    else:
       data = None

    if data and isinstance(data, list) and len(data) > 0:

        schema = T.StructType([T.StructField(col, T.StringType(), True) for col in data[0].keys()])
        df = spark.createDataFrame(data, schema=schema)
        df = df.withColumn('symbol', F.lit(stock))
        dfs.append(df)
        request_counter += 1

    else:
        missing_stocks.append(stock)
        request_counter += 1


else:
    print("API key not found. Please set the FMP_API_KEY environment variable.")



print(f"Missing stocks: {len(missing_stocks)}.")
display(spark.sql('select * from financial_modeling.splits_historical'))

Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds...
Data was appended successfully.
Sleeping for 90 seconds.

Unnamed: 0,date,label,numerator,denominator,symbol
0,2024-09-19,"September 19, 24",1,50,BNZI
1,2006-04-19,"April 19, 06",1,2,CB
2,1998-03-03,"March 03, 98",3,1,CB
3,1996-05-06,"May 06, 96",1,2,CB
4,1990-05-31,"May 31, 90",2,1,CB
5,1985-10-25,"October 25, 85",3,2,CB
6,2018-02-02,"February 02, 18",1004,1000,CAAP
7,2022-12-06,"December 06, 22",1,10,CKPT
8,2006-12-12,"December 12, 06",3,1,EZPW
9,2000-01-27,"January 27, 00",2,1,MS


In [None]:
display(spark.sql('describe history financial_modeling.splits_historical'))

Unnamed: 0,version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,32,2025-05-04 11:04:10,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,31,Serializable,True,"{'numOutputRows': '215', 'numOutputBytes': '5087', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
1,31,2025-05-04 11:02:05,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,30,Serializable,True,"{'numOutputRows': '338', 'numOutputBytes': '6849', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
2,30,2025-05-04 10:59:36,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,29,Serializable,True,"{'numOutputRows': '347', 'numOutputBytes': '6975', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
3,29,2025-05-04 10:57:18,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,28,Serializable,True,"{'numOutputRows': '351', 'numOutputBytes': '7184', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
4,28,2025-05-04 10:54:51,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,27,Serializable,True,"{'numOutputRows': '290', 'numOutputBytes': '6266', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
5,27,2025-05-04 10:52:33,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,26,Serializable,True,"{'numOutputRows': '332', 'numOutputBytes': '6730', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
6,26,2025-05-04 10:50:12,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,25,Serializable,True,"{'numOutputRows': '393', 'numOutputBytes': '7640', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
7,25,2025-05-04 10:47:43,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,24,Serializable,True,"{'numOutputRows': '286', 'numOutputBytes': '6166', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
8,24,2025-05-04 10:45:24,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,23,Serializable,True,"{'numOutputRows': '449', 'numOutputBytes': '8546', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1
9,23,2025-05-04 10:42:45,,,WRITE,"{'mode': 'Append', 'partitionBy': '[]'}",,,,22,Serializable,True,"{'numOutputRows': '456', 'numOutputBytes': '8482', 'numFiles': '1'}",,Apache-Spark/3.5.5 Delta-Lake/3.3.1


In [None]:
display(spark.sql('describe financial_modeling.splits_historical'), n=100)

Unnamed: 0,col_name,data_type,comment
0,date,date,
1,label,string,
2,numerator,int,
3,denominator,int,
4,symbol,string,


In [None]:
display(spark.sql('select * from financial_modeling.splits_historical limit 3'))

Unnamed: 0,date,label,numerator,denominator,symbol
0,2024-09-19,"September 19, 24",1,50,BNZI
1,2006-04-19,"April 19, 06",1,2,CB
2,1998-03-03,"March 03, 98",3,1,CB


## Market Performance

### 1.Sector Historical

In [None]:

base_url = f'https://financialmodelingprep.com/api/v3/historical-sectors-performance?from=2020-01-01&to=2024-09-01'

if API_KEY:
    url = f"{base_url}&apikey={API_KEY}"
    data = get_jsonparsed_data(url)
    sector_historical_df = pd.DataFrame(data)
else:
    print("API key not found. Please set the FMP_API_KEY environment variable.")


print(sector_historical_df.shape)
sector_historical_df.head()

## Economic Data

### 1.Tresury Rates

In [None]:

base_url = f'https://financialmodelingprep.com/api/v4/treasury?from=2020-01-10&to=2024-09-01'

if API_KEY:
    url = f"{base_url}&apikey={API_KEY}"
    data = get_jsonparsed_data(url)
    tresury_rates_df = pd.DataFrame(data)
else:
    print("API key not found. Please set the FMP_API_KEY environment variable.")


print(tresury_rates_df.shape)
tresury_rates_df.head()

### 2.Economic Indicators

In [None]:
indicators = [
              'GDP', 'realGDP', 'nominalPotentialGDP', 'realGDPPerCapita', 'federalFunds', 'CPI', 'inflationRate', 'inflation', 'retailSales',
              'consumerSentiment', 'durableGoods', 'unemploymentRate', 'totalNonfarmPayroll', 'initialClaims', 'industrialProductionTotalIndex',
              'newPrivatelyOwnedHousingUnitsStartedTotalUnits', 'totalVehicleSales', 'retailMoneyFunds', 'smoothedUSRecessionProbabilities',
              '3MonthOr90DayRatesAndYieldsCertificatesOfDeposit', 'commercialBankInterestRateOnCreditCardPlansAllAccounts',
              '30YearFixedRateMortgageAverage', '15YearFixedRateMortgageAverage' ]

economic_indicators_data = []

for indicator in indicators[:2]:

  base_url = f'https://financialmodelingprep.com/api/v4/economic?name={indicator}&from=2020-01-10&to=2024-09-01'

  if API_KEY:
      url = f"{base_url}&apikey={API_KEY}"
      data = get_jsonparsed_data(url)
      df = pd.DataFrame(data)
      df['indicator'] = indicator
      economic_indicators_data.append(df)
  else:
      print("API key not found. Please set the FMP_API_KEY environment variable.")


economic_indicators_df = pd.concat(economic_indicators_data)
print(economic_indicators_df.shape)
economic_indicators_df.head()

## Symbol Change

In [63]:
base_url = f'https://financialmodelingprep.com/stable/symbol-change?limit=10000'

if API_KEY:

  url = f"{base_url}&apikey={API_KEY}"
  result = get_jsonparsed_data(url)

  symbol_change_df = spark.createDataFrame(result)
  (symbol_change_df.write.format("delta")
                   .mode('overwrite')
                   .option('overwriteSchema', True)
                   .saveAsTable('financial_modeling.symbol_change'))

else:
  print("API key not found. Please set the FMP_API_KEY environment variable.")


In [66]:
display(spark.sql('select * from financial_modeling.symbol_change'))

Unnamed: 0,companyName,date,newSymbol,oldSymbol
0,Link Motion,2018-03-14,LKM,NQ
1,Steel Connect,2018-03-13,STCN,MLNK
2,Target Hospitality Corp.,2018-03-13,TH,EAGLU
3,Convergeone Holdings,2018-03-13,CVON,FMCI
4,Qurate Retail,2018-03-13,QRTEA,QVCA
5,Ranpak Holdings Corp.,2018-03-07,PACK,OMAD.U
6,Dunxin Financial Holdings Ltd.,2018-03-05,DXF,XNY
7,Alpine Immune Sciences,2018-03-03,ALPN,NVLS
8,Booking Holdings,2018-02-28,BKNG,PCLN
9,Welltower,2018-02-28,WELL,HCN


## Constituents

### S&P 500 Historical Constituent

#### Stable Version

In [25]:

base_url = 'https://financialmodelingprep.com/stable/historical-sp500-constituent'

if API_KEY:
    url = f"{base_url}?apikey={API_KEY}"
    response = requests.get(url)
    data = response.json()

    df = spark.createDataFrame(data)

    df = df.withColumn("add_flag", F.when(F.col("addedSecurity") == "", 0).otherwise(1)) \
           .withColumn("remove_flag", F.when(F.col("removedSecurity") == "", 0).otherwise(1))

    df = df.withColumn("date", F.to_date(F.col("date")))

    # df = df.withColumn("date_minus_1_quarter", F.add_months(F.col("date"), -3))

    # df = df.withColumn("year", F.year(F.col("date_minus_1_quarter"))) \
    #        .withColumn("quarter", F.quarter(F.col("date_minus_1_quarter")))

    df = df.withColumn("year", F.year(F.col("date"))) \
           .withColumn("quarter", F.quarter(F.col("date")))

    index_additions = df.filter(F.col("add_flag") == 1).select("symbol", "year", "quarter", "add_flag")
    index_removals = df.filter(F.col("remove_flag") == 1).select("symbol", "year", "quarter", "remove_flag")

    print(f"Row count: {df.count()}, Column count: {len(df.columns)}")
    df.show(5)
else:
    print("API key not found. Please set the FMP_API_KEY environment variable.")


Row count: 1497, Column count: 11
+--------------------+----------+-----------------+--------------------+---------------+-------------+------+--------+-----------+----+-------+
|       addedSecurity|      date|        dateAdded|              reason|removedSecurity|removedTicker|symbol|add_flag|remove_flag|year|quarter|
+--------------------+----------+-----------------+--------------------+---------------+-------------+------+--------+-----------+----+-------+
|     Williams-Sonoma|2025-03-23|   March 24, 2025|Market capitaliza...|FMC Corporation|          FMC|   WSM|       1|          1|2025|      1|
|  TKO Group Holdings|2025-03-23|   March 24, 2025|Market capitaliza...|       Celanese|           CE|   TKO|       1|          1|2025|      1|
|            DoorDash|2025-03-23|   March 24, 2025|Market capitaliza...|       Teleflex|          TFX|  DASH|       1|          1|2025|      1|
|       Expand Energy|2025-03-23|   March 24, 2025|Market capitaliza...|     Borgwarner|          BWA|

#### Legacy Version

In [26]:

# base_url = 'https://financialmodelingprep.com/api/v3/historical/sp500_constituent'

# if API_KEY:
#     url = f"{base_url}?apikey={API_KEY}"
#     response = requests.get(url)
#     data = response.json()

#     df = spark.read.json(spark.sparkContext.parallelize(data))

#     df = df.withColumn("add_flag", F.when(F.col("addedSecurity") == "", 0).otherwise(1)) \
#            .withColumn("remove_flag", F.when(F.col("removedSecurity") == "", 0).otherwise(1))

#     df = df.withColumn("date", F.to_date(F.col("date")))

#     df = df.withColumn("date_minus_1_quarter", F.add_months(F.col("date"), -3))

#     df = df.withColumn("year", F.year(F.col("date_minus_1_quarter"))) \
#            .withColumn("quarter", F.quarter(F.col("date_minus_1_quarter")))

#     index_additions = df.filter(F.col("add_flag") == 1).select("symbol", "year", "quarter", "add_flag")
#     index_removals = df.filter(F.col("remove_flag") == 1).select("symbol", "year", "quarter", "remove_flag")

#     print(f"Row count: {df.count()}, Column count: {len(df.columns)}")
#     df.show(5)
# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")


In [27]:
(index_additions.write
        .format("delta")
        .mode('overwrite')
        .saveAsTable('sxp_index_additions'))


In [28]:
(index_removals.write
        .format("delta")
        .mode('overwrite')
        .saveAsTable('sxp_index_removals'))


In [34]:
display(spark.sql('select * from sxp_index_additions'))

Unnamed: 0,symbol,year,quarter,add_flag
0,WSM,2025,1,1
1,TKO,2025,1,1
2,DASH,2025,1,1
3,EXE,2025,1,1
4,APO,2024,4,1
5,LII,2024,4,1
6,WDAY,2024,4,1
7,TPL,2024,4,1
8,AMTM,2024,4,1
9,DELL,2024,3,1


In [35]:
display(spark.sql('select * from sxp_index_removals'))

Unnamed: 0,symbol,year,quarter,remove_flag
0,EMC,1996,1,1
1,GNT,1996,1,1
2,FITB,1996,1,1
3,BAY,1996,1,1
4,CSE,1996,1,1
5,COMS,1995,4,1
6,ITT,1995,4,1
7,HIG,1995,4,1
8,LSI,1995,4,1
9,CMA,1995,4,1


In [33]:
display(spark.sql('select count(*) from sxp_index_additions where year > 1990'))

Unnamed: 0,count(1)
0,769


In [37]:
display(spark.sql('select count(*) from sxp_index_removals where year > 1990'))

Unnamed: 0,count(1)
0,763


In [None]:

# base_url = 'https://financialmodelingprep.com/api/v3/historical/sp500_constituent'

# if API_KEY:
#     url = f"{base_url}?apikey={API_KEY}"
#     data = get_jsonparsed_data(url)
#     df = pd.DataFrame(data)
# else:
#     print("API key not found. Please set the FMP_API_KEY environment variable.")

# df['add_flag'] = np.where(df['addedSecurity']=='', 0, 1)
# df['remove_flag'] = np.where(df['removedSecurity']=='', 0, 1)

# df['date'] = pd.to_datetime(df['date'])
# df['date_minus_1_quarter'] = df['date'] - pd.DateOffset(months=3)


# df['year'] = df['date_minus_1_quarter'].dt.year
# df['quarter'] = df['date_minus_1_quarter'].dt.quarter


# index_additions = df[df['add_flag'] == 1][['symbol', 'year', 'quarter', 'add_flag']]
# index_removals = df[df['remove_flag'] == 1][['symbol', 'year', 'quarter', 'remove_flag']]

# print(df.shape)
# df.head()

In [None]:
index_additions

DataFrame[symbol: string, year: int, quarter: int, add_flag: int]

In [None]:
index_removals

DataFrame[symbol: string, year: int, quarter: int, remove_flag: int]

In [None]:
# db_connector = DBConnector()

# db_connector.write_df_to_db(df = df,
#                             table_name = 'sp500_historical_constituent',
#                             mode ='replace')

# db_connector.write_df_to_db(df = index_additions,
#                             table_name = 'sp500_index_additions',
#                             mode ='replace')

# db_connector.write_df_to_db(df = index_removals,
#                             table_name = 'sp500_index_removals',
#                             mode ='replace')

## Create Feature Store

In [None]:
# full_df = pd.merge(quarterly_market_cap, hist_employee_count_df, on=['symbol','year'], how='left')

# full_df = pd.merge(full_df, quarterly_analyst_recommendations_df, on=['symbol','year', 'quarter'], how='left')

# full_df = pd.merge(full_df, stock_peers_df, on=['symbol'], how='left')

# full_df = pd.merge(full_df, quarterly_company_historical_share_float_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, income_statements_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, balance_sheet_statements_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, cashflow_statements_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, key_metrics_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, ratios_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, cashflow_growth_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, income_growth_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, balance_sheet_growth_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, financial_growth_df, on=['symbol', 'year', 'quarter'], how='left')

# full_df = pd.merge(full_df, dividends_historical_df, on=['symbol', 'year', 'quarter'], how='left')



# full_df = pd.merge(full_df, index_additions, on=['symbol', 'year', 'quarter'], how='left')

# full_df.fillna(0, inplace=True)

# print(full_df.shape)
# full_df.drop(['peersList'], axis=1).head(10)