In [1]:
# TODO: get symbols from "https://www.nasdaq.com/market-activity/ipos"
symbols = [
    "DIDI", "AVTE", "SHQAU", "CPOP", "THCPU", "MCAFU", "DNAA", "DNAB",
    # "DNAC", "DNAD", "IAS", "YOU", "CVRX", "LZ", "INTA", "S", "XMTR",
    # "BTTR", "DDL", "GOBI", "EOCWU", "IAUM", "ACXP", "GMVD", "ELEV",
    # "GRPH", "MF", "GHRS", "TKNO", "MCW", "NLITU", "DOCS", "GLUE",
    # "BON", "GRVI", "CFLT", "FTVIU", "MIRO", "BHG", "CXM", "MITAU",
    # "FA", "GPCOU", "YMM", "RERE", "AMAM", "DNAY", "ATAI", "FEMY",
    # "TRMR", "IPSC", "JUGGU", "CYT", "ISPC", "VERV", "TRONU", "LYEL",
    # "AOMR", "CNVY", "ZTAQU", "MOLN", "WKME", "RONIU", "DILAU",
    # "GSQBU", "ALZN", "SBEV", "LITTU", "HCNEU", "LAAAU", "TASK",
    # "JANX", "BZ", "ZETA", "BSGAU", "MNDY", "LFST", "DIBS", "SMIHU",
    "MQ", "GACQU", "CLBRU", "ISAA", "SPKAU", "ZME", "DLO"
]

# TODO: get year and month_num from "https://www.nasdaq.com/market-activity/ipos"
year = 2021
month_num = 8

In [2]:
import yfinance as yf
import pandas as pd
from pathlib import Path
import json

selected_attributes = [
#     Financial Performance Indicators:
    'trailingPE', 'forwardPE', 'priceToSalesTrailing12Months', 'priceToBook',
    'returnOnAssets', 'returnOnEquity', 'profitMargins', 'operatingMargins',
    'grossMargins', 'earningsQuarterlyGrowth', 'revenueGrowth', 
#     Market Valuation Metrics:
    'marketCap', 'enterpriseValue', 'enterpriseToRevenue', 'enterpriseToEbitda',
#     Stock Performance Metrics:
    'previousClose', 'open', 'dayLow', 'dayHigh', 'fiftyTwoWeekLow',
    'fiftyTwoWeekHigh', 'fiftyDayAverage', 'twoHundredDayAverage', 'beta', '52WeekChange',
#     Volume and Liquidity:
    'volume', 'averageVolume', 'averageVolume10days',
#     Analyst Opinions:
    'targetHighPrice', 'targetLowPrice', 'targetMeanPrice',
    'targetMedianPrice', 'recommendationMean', 'numberOfAnalystOpinions',
#     Financial Health Indicators:
    'totalCash', 'totalDebt', 'quickRatio', 'currentRatio',
    'debtToEquity', 'freeCashflow', 'operatingCashflow',
#     Company-specific Metrics:
    'totalRevenue', 'ebitda', 'netIncomeToCommon',
#     Risk Indicators:
    'auditRisk', 'boardRisk', 'compensationRisk', 'shareHolderRightsRisk', 'overallRisk',
#     Ownership Information:
    'heldPercentInsiders', 'heldPercentInstitutions', 'sharesPercentSharesOut',
#     Market Sentiment:
    'shortRatio', 'shortPercentOfFloat'
]

In [3]:
# Function to save stock info as JSON
def save_info_as_json(symbol, info):
    try:
        info_path = Path(f"../data/ipo-dataset/{year}/{month_num}/{symbol}-info.json")
        info_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(info_path, 'w') as json_file:
            json.dump(info, json_file, indent=4)
        
        print(f"Saved info data for {symbol}")
    except Exception as e:
        print(f"Error saving info data for {symbol}: {e}")

In [4]:
# Function to save stock historical data as CSV
def save_stock_data_to_csv(symbol, data):
    try:
        if data.empty:
            print(f"No data to save for {symbol}")
            return
        
        data.reset_index(inplace=True)
        data['Date'] = pd.to_datetime(data['Date'], errors='coerce').dt.strftime('%d%m%Y')
        
        csv_path = Path(f"../data/ipo-dataset/{year}/{month_num}/{symbol}.csv")
        csv_path.parent.mkdir(parents=True, exist_ok=True)
        data.to_csv(csv_path, header=True, index=False)
        
        print(f"Saved historical data for {symbol}")
    except Exception as e:
        print(f"Error saving historical data for {symbol}: {e}")

In [5]:
# Function to fetch and filter stock info
def fetch_filtered_stock_info(symbol):
    try:
        stock = yf.Ticker(symbol)
        info = stock.info
        return {key: info[key] for key in selected_attributes if key in info}
    except Exception as e:
        print(f"Error fetching info for {symbol}: {e}")
        return None

In [6]:
# Function to fetch stock historical data
def fetch_stock_history(symbol):
    try:
        stock = yf.Ticker(symbol)
        return stock.history(period="max")
    except Exception as e:
        print(f"Error fetching history for {symbol}: {e}")
        return None

In [7]:
# Main function to process all symbols
def process_symbols(symbols):
    for symbol in symbols:
        filtered_info = fetch_filtered_stock_info(symbol)
        if filtered_info:
            save_info_as_json(symbol, filtered_info)

        stock_data = fetch_stock_history(symbol)
        if stock_data is not None:
            save_stock_data_to_csv(symbol, stock_data)

In [8]:
# Run the process
process_symbols(symbols)

$DIDI: possibly delisted; no timezone found


No data to save for DIDI
Saved info data for AVTE
Saved historical data for AVTE


$SHQAU: possibly delisted; no timezone found


No data to save for SHQAU
Saved info data for CPOP
Saved historical data for CPOP
Saved info data for THCPU
Saved historical data for THCPU


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/MCAFU?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=MCAFU&crumb=IbD7b6csMIc
$MCAFU: possibly delisted; no timezone found


No data to save for MCAFU


$DNAA: possibly delisted; no timezone found


No data to save for DNAA


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/DNAB?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=DNAB&crumb=IbD7b6csMIc
$DNAB: possibly delisted; no timezone found


No data to save for DNAB
Saved info data for MQ
Saved historical data for MQ


$GACQU: possibly delisted; no timezone found


No data to save for GACQU


$CLBRU: possibly delisted; no timezone found


No data to save for CLBRU


$ISAA: possibly delisted; no timezone found


No data to save for ISAA


$SPKAU: possibly delisted; no timezone found


No data to save for SPKAU


$ZME: possibly delisted; no timezone found


No data to save for ZME
Saved info data for DLO
Saved historical data for DLO
