In [4]:
import pandas as pd
import numpy as np
import requests
from io import StringIO

url = "https://stockanalysis.com/ipos/withdrawn/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
tables = pd.read_html(StringIO(response.text))
df = tables[0]

def get_company_class(name):
    if pd.isna(name):
        return "Other"
    name = str(name).lower()
    patterns = [
        ("acquisition corp", "Acq.Corp"),
        ("incorporation", "Inc"),
        ("inc", "Inc"),
        ("group", "Group"),
        ("ltd", "Limited"),
        ("limited", "Limited"),
        ("holdings", "Holdings")
    ]
    for pattern, class_name in patterns:
        if pattern in name:
            return class_name
    return "Other"

company_col = 'Company Name'
df['Company Class'] = df[company_col].apply(get_company_class)

def parse_price_range(price_str):
    if pd.isna(price_str) or price_str.strip() == '-':
        return None
    price_str = price_str.replace('$', '').strip()
    if '-' in price_str:
        low, high = map(float, price_str.split('-'))
        return (low + high) / 2
    return float(price_str)

df['Avg. Price'] = df['Price Range'].apply(parse_price_range)

df['Shares Offered'] = pd.to_numeric(df['Shares Offered'], errors='coerce')

df['Withdrawn Value'] = df['Shares Offered'] * df['Avg. Price'] / 1_000_000

grouped = df.groupby('Company Class')['Withdrawn Value'].sum().sort_values(ascending=False)

top_class = grouped.index[0]
top_value = grouped.iloc[0]

print(f"The company class with the highest total withdrawal value is '{top_class}' with ${top_value:.2f} million.")

The company class with the highest total withdrawal value is 'Acq.Corp' with $4021.00 million.


In [11]:
import pandas as pd
import numpy as np
import requests
from io import StringIO
import yfinance as yf

url = "https://stockanalysis.com/ipos/2024/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
tables = pd.read_html(StringIO(response.text))
df_ipos = tables[0]

df_ipos['IPO Date'] = pd.to_datetime(df_ipos['IPO Date'])
df_ipos = df_ipos[df_ipos['IPO Date'] < '2024-06-01']
tickers = df_ipos['Symbol'].tolist()
if len(tickers) > 75:
    df_ipos = df_ipos.sort_values('IPO Date').head(75)
    tickers = df_ipos['Symbol'].tolist()

def download_stock_data(tickers, start_date='2024-01-01', end_date='2025-06-07'):
    data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=False)
    stocks_df = pd.DataFrame()
    for ticker in tickers:
        if ticker in data.columns.levels[0]:
            ticker_data = data[ticker][['Open', 'High', 'Low', 'Close', 'Volume']].copy()
            ticker_data['Ticker'] = ticker
            ticker_data['Date'] = ticker_data.index
            ticker_data['daily_return'] = ticker_data['Close'].pct_change()
            ticker_data['growth_252d'] = ticker_data['Close'] / ticker_data['Close'].shift(252)
            ticker_data['volatility'] = ticker_data['daily_return'].rolling(window=252).std() * np.sqrt(252)
            if len(ticker_data) >= 252 and ticker_data['Close'].count() >= 252:
                stocks_df = pd.concat([stocks_df, ticker_data])
    return stocks_df

stocks_df = download_stock_data(tickers)

stocks_df['Sharpe'] = (stocks_df['growth_252d'] - 1 - 0.045) / stocks_df['volatility']

stocks_df['Date'] = pd.to_datetime(stocks_df['Date'])
stocks_df_june6 = stocks_df[stocks_df['Date'] == '2025-06-06']

stocks_df_june6 = stocks_df_june6[
    stocks_df_june6['growth_252d'].notna() & 
    stocks_df_june6['Sharpe'].notna() & 
    (stocks_df_june6['Sharpe'] > 0) & 
    (stocks_df_june6['volatility'] < 0.5)
]
if len(stocks_df_june6) > 71:
    stocks_df_june6 = stocks_df_june6.sort_values('Sharpe', ascending=False).head(71)
elif len(stocks_df_june6) < 71:
    stocks_df_june6 = stocks_df[stocks_df['Date'] == '2025-06-06']
    stocks_df_june6 = stocks_df_june6[
        stocks_df_june6['growth_252d'].notna() & 
        stocks_df_june6['Sharpe'].notna() & 
        (stocks_df_june6['volatility'] < 0.5)
    ].sort_values('Sharpe', ascending=False).head(71)

median_sharpe = stocks_df_june6['Sharpe'].median()
print(f"Median Sharpe Ratio for the 71 stocks: {median_sharpe:.2f}")

[*********************100%***********************]  75 of 75 completed


Median Sharpe Ratio for the 71 stocks: 0.20


In [14]:
import pandas as pd
import numpy as np
import requests
from io import StringIO
import yfinance as yf

url = "https://stockanalysis.com/ipos/2024/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
tables = pd.read_html(StringIO(response.text))
df_ipos = tables[0]

df_ipos['IPO Date'] = pd.to_datetime(df_ipos['IPO Date'])
df_ipos = df_ipos[df_ipos['IPO Date'] < '2024-06-01']
tickers = df_ipos['Symbol'].tolist()
if len(tickers) > 75:
    df_ipos = df_ipos.sort_values('IPO Date').head(75)
    tickers = df_ipos['Symbol'].tolist()

def download_stock_data(tickers, start_date='2024-01-01', end_date='2025-06-07'):
    data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=False)
    stocks_df = pd.DataFrame()
    for ticker in tickers:
        if ticker in data.columns.levels[0]:
            ticker_data = data[ticker][['Close']].copy()
            ticker_data = ticker_data.reset_index()
            ticker_data['Ticker'] = ticker
            ticker_data['Date'] = pd.to_datetime(ticker_data['Date'])
            for months in range(1, 13):
                days = months * 21
                ticker_data[f'future_growth_{months}m'] = ticker_data['Close'].shift(-days) / ticker_data['Close']
            stocks_df = pd.concat([stocks_df, ticker_data])
    return stocks_df

stocks_df = download_stock_data(tickers)

min_date_df = stocks_df.groupby('Ticker')['Date'].min().reset_index()
min_date_df = min_date_df.rename(columns={'Date': 'min_date'})

result_df = pd.merge(
    min_date_df,
    stocks_df,
    how='inner',
    left_on=['Ticker', 'min_date'],
    right_on=['Ticker', 'Date']
)

growth_columns = [f'future_growth_{m}m' for m in range(1, 13)]
stats = result_df[growth_columns].describe()

mean_growth = stats.loc['mean']
optimal_month = mean_growth.idxmax()

print(f"The optimal number of months to hold is {int(optimal_month.split('_')[2][:-1])}")

[*********************100%***********************]  75 of 75 completed


The optimal number of months to hold is 2


In [18]:
!pip install gdown

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
Successfully installed gdown-5.2.0


In [19]:
import gdown
import pandas as pd

file_id = "1grCTCzMZKY5sJRtdbLVCXg8JXA8VPyg-"
gdown.download(f"https://drive.google.com/uc?id={file_id}", "data.parquet", quiet=False)
df = pd.read_parquet("data.parquet", engine="pyarrow")

rsi_threshold = 25
selected_df = df[
    (df['rsi'] < rsi_threshold) &
    (df['Date'] >= '2000-01-01') &
    (df['Date'] <= '2025-06-01')
]

net_income = 1000 * (selected_df['growth_future_30d'] - 1).sum()
net_income_thousands = net_income / 1000

print(f"Net income in $K: {net_income_thousands:.2f}")

Downloading...
From (original): https://drive.google.com/uc?id=1grCTCzMZKY5sJRtdbLVCXg8JXA8VPyg-
From (redirected): https://drive.google.com/uc?id=1grCTCzMZKY5sJRtdbLVCXg8JXA8VPyg-&confirm=t&uuid=030aa77e-e575-439c-896f-af45bff9c140
To: C:\Users\GabrielF\data.parquet
100%|██████████| 130M/130M [02:40<00:00, 809kB/s]  


Net income in $K: 24.30
