In [1]:
!pip install yfinance



In [2]:
# Import libraries
import numpy as np
import pandas as pd
import requests
import re
from io import StringIO

import yfinance as yf
import pandas_datareader as pdr

import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt

import time
from datetime import date

## Question 1: [IPO] Withdrawn IPOs by Company Type

### What is the total withdrawn IPO value (in $ millions) for the company class with the highest total withdrawal value?

From the withdrawn IPO list (stockanalysis.com/ipos/withdrawn), collect and process the data to find out which company type saw the most withdrawn IPO value.

**Steps:**
1. Use `pandas.read_html()` with the URL above to load the IPO withdrawal table into a DataFrame. It is a similar process to Code Snippet 1 discussed at the livestream. You should get 99 entries.
2. Create a new column called `Company Class`, categorizing company names based on patterns like:
  - “Acquisition Corp” or “Acquisition Corporation” → `Acq.Corp`
  - “Inc” or “Incorporated” → `Inc`
  - “Group” → `Group`
  - “Holdings” → `Holdings`
  - “Ltd” or “Limited” → `Ltd`
  - Others → `Other`
- Hint: make your function more robust by converting names to lowercase and splitting into words before matching patterns.
3. Define a new field `Avg. price` by parsing the `Price Range` field (create a function and apply it to the `Price Range `column).

  Examples:

  * '$8.00-$10.00' → `9.0`

  * '$5.00' → `5.0`

  * '-' → `None`

4. Convert `Shares Offered` to numeric, clean missing or invalid values.
5. Create a new column:
`Withdrawn Value = Shares Offered * Avg Price` (71 non-null values)
6. Group by `Company Class` and calculate total withdrawn value.
7. **Answer**: Which class had the highest total value of withdrawals?

### **Step 1**

In [3]:
def get_ipos_withdrawal_value() -> pd.DataFrame:
    """
    Fetch IPO data from stockanalysis.com.
    """
    url = f"https://stockanalysis.com/ipos/withdrawn/"
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No data found.")

        return tables[0]

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except ValueError as ve:
        print(f"Data error: {ve}")
    except Exception as ex:
        print(f"Unexpected error: {ex}")

    return pd.DataFrame()

In [4]:
# Load the data and display first 5 rows
df1 = get_ipos_withdrawal_value()
df1.head()

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered
0,ODTX,"Odyssey Therapeutics, Inc.",-,-
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000
2,AURN,"Aurion Biotech, Inc.",-,-
3,ROTR,"PHI Group, Inc.",-,-
4,ONE,One Power Company,-,-


In [5]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Symbol          100 non-null    object
 1   Company Name    100 non-null    object
 2   Price Range     100 non-null    object
 3   Shares Offered  100 non-null    object
dtypes: object(4)
memory usage: 3.3+ KB


In [6]:
df1.duplicated().sum()

np.int64(0)

### **Step 2**

In [7]:
def company_class(company_name):
    name = str(company_name).lower()

    # split using any non-alphanumeric separator
    words = re.split(r'\W+', name)

    # Map the company class based on the name
    if "acquisition" in words and ("corp" in words or "corp." in words or "corporation" in words):
        return "Acq.Corp"
    elif "holdings" in words:
        return "Holdings"
    elif "group" in words:
        return "Group"
    elif "inc" in words or "inc." in words or "incorporated" in words:
        return "Inc"
    elif "ltd" in words or "limited" in words:
        return "Ltd"
    else:
        return "Other"

In [8]:
df1['Company Class'] = df1['Company Name'].apply(company_class)
df1.sample(10)

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class
31,LEWY,"LeeWay Services, Inc.",$5.00,3000000,Inc
23,RPGL,Republic Power Group Limited,$5.00 - $6.00,1200000,Group
6,CABR,"Caring Brands, Inc.",$4.00,750000,Inc
58,JR,Jinrong Holdings Ltd.,$5.00,-,Holdings
84,UNSD,UNSDG Acquisition Corp.,$10.00,10000000,Acq.Corp
85,MELT,"Melt Pharmaceuticals, Inc.",-,-,Inc
40,IMSV,IMMRSIV Inc.,$4.00 - $5.00,1500000,Inc
4,ONE,One Power Company,-,-,Other
17,TURO,Turo Inc.,-,-,Inc
72,SBP,"Specialty Building Products, Inc.",-,-,Inc


In [9]:
df1['Company Class'].value_counts()

Unnamed: 0_level_0,count
Company Class,Unnamed: 1_level_1
Inc,40
Acq.Corp,21
Ltd,12
Group,12
Holdings,9
Other,6


### **Step 3**

In [10]:
def range_to_avg(value):

    # Extract numeric values from the string
    numbers = re.findall(r"\d+\.?\d*", str(value))

    if not numbers:
        return None
    elif len(numbers) == 1:
        return float(numbers[0])
    else:
        return (float(numbers[0]) + float(numbers[1])) / 2

In [11]:
df1['Avg. price'] = df1['Price Range'].apply(range_to_avg)
df1.tail()

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg. price
95,FHP,"Freehold Properties, Inc.",-,-,Inc,
96,CHO,Chobani Inc.,-,-,Inc,
97,IFIT,iFIT Health & Fitness Inc.,$18.00 - $21.00,30769231,Inc,19.5
98,GLGX,"Gerson Lehrman Group, Inc.",-,-,Group,
99,HCG,hear.com N.V.,$17.00 - $20.00,16220000,Other,18.5


### **Step 4**

In [12]:
# Convert to numeric and force invalid values to NaN
df1['Shares Offered'] = pd.to_numeric(df1['Shares Offered'], errors='coerce')

In [13]:
stats = df1['Shares Offered'].describe()

# Format in millions and round to 2 decimal places
summary_millions = stats.copy()
summary_millions[1:] = summary_millions[1:] / 1000000
summary_millions = summary_millions.round(2)

# Convert to DataFrame for display
summary_df = pd.DataFrame(summary_millions).rename(columns={"Shares Offered": "Value (in Millions)"})
print(summary_df)

       Value (in Millions)
count                72.00
mean                  9.98
std                  10.48
min                   0.50
25%                   1.58
50%                   3.75
75%                  20.00
max                  45.00


### **Step 5**

In [14]:
df1['Withdrawn Value'] = df1['Avg. price'] * df1['Shares Offered']
df1.head()

Unnamed: 0,Symbol,Company Name,Price Range,Shares Offered,Company Class,Avg. price,Withdrawn Value
0,ODTX,"Odyssey Therapeutics, Inc.",-,,Inc,,
1,UNFL,"Unifoil Holdings, Inc.",$3.00 - $4.00,2000000.0,Holdings,3.5,7000000.0
2,AURN,"Aurion Biotech, Inc.",-,,Inc,,
3,ROTR,"PHI Group, Inc.",-,,Group,,
4,ONE,One Power Company,-,,Other,,


### **Step 6**

In [15]:
# Group by Company Class and calculate total withdrawn value
grouped_df = df1.groupby('Company Class')['Withdrawn Value'].sum().reset_index()

# Convert to billions
grouped_df['Withdrawn Value (Billions)'] = grouped_df['Withdrawn Value'] / 1000000000
grouped_df['Withdrawn Value (Billions)'] = grouped_df['Withdrawn Value (Billions)'].map('{:.2f}B'.format)

print(grouped_df[['Company Class', 'Withdrawn Value (Billions)']])

  Company Class Withdrawn Value (Billions)
0      Acq.Corp                      4.02B
1         Group                      0.37B
2      Holdings                      0.32B
3           Inc                      1.91B
4           Ltd                      0.32B
5         Other                      0.77B


### Answer

The Acq.Corp class has the highest withdrawn value of 4.02B dollars

## Question 2: [IPO] Median Sharpe Ratio for 2024 IPOs (First 5 Months)

### What is the median Sharpe ratio (as of 6 June 2025) for companies that went public in the first 5 months of 2024?

The goal is to replicate the large-scale `yfinance` OHLCV data download and perform basic financial calculations on IPO stocks.

### **Steps:**

1. Using the same approach as in Question 1, download the IPOs in 2024 from:
https://stockanalysis.com/ipos/2024/
Filter to keep only those IPOs **before 1 June 2024** (first 5 months of 2024).

  ➤ You should have **75 tickers**.

2. Use Code Snippet 7 to **download daily stock data** for those tickers (via `yfinance`).
Make sure you understand how `growth_1d` ... `growth_365d`, and `volatility` columns are defined.
Define a new column `growth_252d` representing growth after 252 trading days (~1 year), in addition to any other growth periods you already track.

3. **Calculate the Sharpe ratio** assuming a risk-free rate of **4.5%**:

      ` stocks_df['Sharpe'] = (stocks_df['growth_252d'] - 0.045) / stocks_df['volatility'] `

4. **Filter the DataFrame** to keep data only for the trading day:**2025-06-06**

  Compute descriptive statistics (e.g., `.describe()`) for these columns:

  * `growth_252d`
  * `Sharpe`

  You should observe:

  * `growth_252d` is defined for 71 out of 75 stocks (some IPOs are too recent or data starts later).
  * Median `growth_252d` is approximately 0.75 (indicating a 25% decline), while mean is about 1.15, showing a bias towards high-growth companies pushing the average up.

5. **Answer**:

  * What is the median Sharpe ratio for these 71 stocks?
  * Note: Positive `Sharpe` means growth exceeding the risk-free rate of 4.5%.
  * [Additional] Do you observe the same top 10 companies when sorting by `growth_252d` versus sorting by `Sharpe`?


### **Step 1**

In [16]:
def get_ipos_2024() -> pd.DataFrame:
    """
    Fetch IPO data for the given year from stockanalysis.com.
    """
    url = f"https://stockanalysis.com/ipos/2024/"
    headers = {
        'User-Agent': (
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/58.0.3029.110 Safari/537.3'
        )
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        html_io = StringIO(response.text)
        tables = pd.read_html(html_io)

        if not tables:
            raise ValueError(f"No tables found.")

        return tables[0]

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except ValueError as ve:
        print(f"Data error: {ve}")
    except Exception as ex:
        print(f"Unexpected error: {ex}")

    return pd.DataFrame()

In [17]:
ipos_2024 = get_ipos_2024()
ipos_2024.head()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"Dec 31, 2024",ONEG,OneConstruction Group Limited,$4.00,$4.15,9.25%
1,"Dec 27, 2024",PHH,"Park Ha Biological Technology Co., Ltd.",$4.00,$21.48,432.75%
2,"Dec 23, 2024",HIT,"Health In Tech, Inc.",$4.00,$0.58,-85.20%
3,"Dec 23, 2024",TDAC,Translational Development Acquisition Corp.,$10.00,$10.26,2.60%
4,"Dec 20, 2024",RANG,Range Capital Acquisition Corp.,$10.00,$10.20,2.50%


In [18]:
ipos_2024.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225 entries, 0 to 224
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   IPO Date      225 non-null    object
 1   Symbol        225 non-null    object
 2   Company Name  225 non-null    object
 3   IPO Price     225 non-null    object
 4   Current       225 non-null    object
 5   Return        225 non-null    object
dtypes: object(6)
memory usage: 10.7+ KB


In [19]:
missing_prices = ipos_2024[ipos_2024['IPO Price'].astype(str).str.find('-') >= 0]
missing_prices

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
61,"Oct 11, 2024",STFS,Star Fashion Culture Holdings Limited,-,$1.17,-
120,"Jul 23, 2024",ZDAI,Primega Group Holdings Limited,-,$0.85,-
148,"May 31, 2024",NAKA,"Kindly MD, Inc.",-,$14.55,-
174,"Apr 17, 2024",SUPX,SuperX AI Technology Limited,-,$9.83,-


Cleaning

In [20]:
# Remove '$' from 'IPO Price' and 'Current' columns and '%' from Return column
ipos_2024['IPO Price'] = ipos_2024['IPO Price'].str.replace('$', '', regex=False)
ipos_2024['Current'] = ipos_2024['Current'].str.replace('$', '', regex=False)
ipos_2024['Return(%)'] = ipos_2024['Return'].str.replace('%', '', regex=False)

In [21]:
# Convert to numeric
ipos_2024[['IPO Price', 'Current', 'Return(%)']] = ipos_2024[['IPO Price', 'Current', 'Return(%)']].apply(pd.to_numeric, errors='coerce')

# Convert to datetime
ipos_2024['IPO Date'] = pd.to_datetime(ipos_2024['IPO Date'], format='mixed')

In [22]:
ipos_2024['Return'] = ipos_2024['Current'] - ipos_2024['IPO Price']

In [23]:
ipos_2024.dropna(inplace=True)  # drop missing values since price is important for the purpose of this question

In [24]:
ipos_2024.sample(3)

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return,Return(%)
130,2024-06-28,ALMS,Alumis Inc.,16.0,3.09,-12.91,-80.81
50,2024-10-23,SAG,SAG Holdings Limited,8.0,1.01,-6.99,-86.75
1,2024-12-27,PHH,"Park Ha Biological Technology Co., Ltd.",4.0,21.48,17.48,432.75


In [25]:
ipos_2024.info()

<class 'pandas.core.frame.DataFrame'>
Index: 220 entries, 0 to 224
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   IPO Date      220 non-null    datetime64[ns]
 1   Symbol        220 non-null    object        
 2   Company Name  220 non-null    object        
 3   IPO Price     220 non-null    float64       
 4   Current       220 non-null    float64       
 5   Return        220 non-null    float64       
 6   Return(%)     220 non-null    float64       
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 13.8+ KB


In [26]:
filtered_ipos_2024 = ipos_2024[ipos_2024['IPO Date'] < '2024-06-01'].reset_index(drop=True)
filtered_ipos_2024.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   IPO Date      75 non-null     datetime64[ns]
 1   Symbol        75 non-null     object        
 2   Company Name  75 non-null     object        
 3   IPO Price     75 non-null     float64       
 4   Current       75 non-null     float64       
 5   Return        75 non-null     float64       
 6   Return(%)     75 non-null     float64       
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 4.2+ KB


In [27]:
filtered_ipos_2024.describe()

Unnamed: 0,IPO Date,IPO Price,Current,Return,Return(%)
count,75,75.0,75.0,75.0,75.0
mean,2024-03-16 04:09:36,13.6968,16.451467,2.754667,1.539467
min,2024-01-09 00:00:00,4.0,0.02,-48.21,-99.57
25%,2024-02-06 12:00:00,4.0,1.265,-3.91,-75.04
50%,2024-03-21 00:00:00,10.0,4.7,-2.24,-47.17
75%,2024-04-22 00:00:00,17.75,20.505,0.915,15.0
max,2024-05-23 00:00:00,92.0,133.83,99.83,765.38
std,,15.253969,26.08838,20.099847,127.215652


### **Step 2**

In [28]:
# Extract the symbols in filtered_ipos_2024 into a list
tickers = filtered_ipos_2024['Symbol'].tolist()
print("Tickers:", tickers)

Tickers: ['BOW', 'HDL', 'RFAI', 'JDZG', 'RAY', 'BTOC', 'ZK', 'GPAT', 'PAL', 'SVCO', 'NNE', 'CCIX', 'VIK', 'ZONE', 'LOAR', 'MRX', 'RBRK', 'NCI', 'MFI', 'YYGH', 'TRSG', 'CDTG', 'CTRI', 'IBTA', 'MTEN', 'TWG', 'ULS', 'PACS', 'MNDR', 'CTNM', 'MAMO', 'ZBAO', 'BOLD', 'MMA', 'UBXG', 'IBAC', 'AUNA', 'BKHA', 'LOBO', 'RDDT', 'ALAB', 'INTJ', 'RYDE', 'LGCL', 'SMXT', 'VHAI', 'DYCQ', 'CHRO', 'UMAC', 'HLXB', 'MGX', 'TBBB', 'TELO', 'KYTX', 'PMNT', 'AHR', 'LEGT', 'ANRO', 'GUTS', 'AS', 'FBLG', 'AVBP', 'BTSG', 'HAO', 'CGON', 'YIBO', 'JL', 'SUGP', 'JVSA', 'KSPI', 'CCTG', 'PSBD', 'SYNX', 'SDHC', 'ROMA']


In [29]:
def get_growth_df(df: pd.DataFrame) -> pd.DataFrame:
    GROWTH_KEYS = []
    for i in [1, 3, 7, 30, 90, 252, 365]:
        col_name = f'growth_{i}d'
        df[col_name] = df['Close'] / df['Close'].shift(i)
        GROWTH_KEYS.append(col_name)

    # Reset index to turn the Date into a column
    df = df.reset_index()
    return df[['Date', 'Symbol', 'Close'] + GROWTH_KEYS]

In [30]:
# Load data
growth_dfs = []

for ticker in tickers:
    try:
        df = yf.Ticker(ticker).history(period="max", interval="1d")[['Close']]
        if not df.empty:
            df['Symbol'] = ticker
            df = get_growth_df(df)
            growth_dfs.append(df)
        else:
            print(f"No data for {ticker}")
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

In [31]:
growth_df = pd.concat(growth_dfs, ignore_index=True)
growth_df.head()

Unnamed: 0,Date,Symbol,Close,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,growth_365d
0,2024-05-23 00:00:00-04:00,BOW,23.799999,,,,,,,
1,2024-05-24 00:00:00-04:00,BOW,25.700001,1.079832,,,,,,
2,2024-05-28 00:00:00-04:00,BOW,26.48,1.03035,,,,,,
3,2024-05-29 00:00:00-04:00,BOW,26.290001,0.992825,1.104622,,,,,
4,2024-05-30 00:00:00-04:00,BOW,26.139999,0.994294,1.017121,,,,,


In [32]:
growth_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23169 entries, 0 to 23168
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype                           
---  ------       --------------  -----                           
 0   Date         23169 non-null  datetime64[ns, America/New_York]
 1   Symbol       23169 non-null  object                          
 2   Close        23169 non-null  float64                         
 3   growth_1d    23094 non-null  float64                         
 4   growth_3d    22944 non-null  float64                         
 5   growth_7d    22644 non-null  float64                         
 6   growth_30d   20919 non-null  float64                         
 7   growth_90d   16419 non-null  float64                         
 8   growth_252d  4358 non-null   float64                         
 9   growth_365d  0 non-null      float64                         
dtypes: datetime64[ns, America/New_York](1), float64(8), object(1)
memory usage: 1.8+ M

In [33]:
# Volatility index
vix = yf.Ticker("^VIX").history(
                     period = "max",
                     interval = "1d").reset_index()
vix.sample()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
8499,2023-09-27 00:00:00-05:00,18.290001,19.709999,18.030001,18.219999,0,0.0,0.0


In [44]:
vix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8931 entries, 0 to 8930
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    8931 non-null   datetime64[ns]
 1   VIX     8931 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 139.7 KB


In [35]:
vix = vix.rename(columns={'Close': 'VIX'})
vix = vix[['Date', 'VIX']]
vix.sample()

Unnamed: 0,Date,VIX
814,1993-03-22 00:00:00-06:00,13.66


In [43]:
# Remove timezone from Date columns
growth_df['Date'] = growth_df['Date'].dt.tz_localize(None)
vix['Date'] = vix['Date'].dt.tz_localize(None)

In [47]:
# Combine the growth and volatility data
combined_df = pd.merge(growth_df, vix, on='Date', how='left')
combined_df.tail(3)

Unnamed: 0,Date,Symbol,Close,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,growth_365d,VIX
23166,2025-06-13,ROMA,2.66,0.923611,0.960289,0.707447,1.934546,3.917526,4.666667,,20.82
23167,2025-06-16,ROMA,2.875,1.080827,0.958333,0.805322,1.955782,4.3429,5.424529,,19.110001
23168,2025-06-17,ROMA,2.8761,1.000383,0.998646,0.777324,1.867597,4.210981,5.154301,,20.52


### **Step 3**

In [48]:
# Calculate shrape ratio
combined_df['Sharpe'] = (combined_df['growth_252d'] - 0.045) / combined_df['VIX']
combined_df.tail(3)

Unnamed: 0,Date,Symbol,Close,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,growth_365d,VIX,Sharpe
23166,2025-06-13,ROMA,2.66,0.923611,0.960289,0.707447,1.934546,3.917526,4.666667,,20.82,0.221982
23167,2025-06-16,ROMA,2.875,1.080827,0.958333,0.805322,1.955782,4.3429,5.424529,,19.110001,0.281503
23168,2025-06-17,ROMA,2.8761,1.000383,0.998646,0.777324,1.867597,4.210981,5.154301,,20.52,0.248991


### **Step 4**

In [49]:
# Filter for 2025-06-06
filtered_df = combined_df[combined_df['Date']== '2025-06-06']
filtered_df

Unnamed: 0,Date,Symbol,Close,growth_1d,growth_3d,growth_7d,growth_30d,growth_90d,growth_252d,growth_365d,VIX,Sharpe
259,2025-06-06,BOW,36.389999,1.010272,0.971696,0.996986,0.879623,1.105407,1.442331,,16.77,0.083323
530,2025-06-06,HDL,20.410000,0.998532,1.023160,1.039206,0.969366,0.869992,1.007155,,16.77,0.057374
769,2025-06-06,RFAI,10.510000,1.000000,1.000952,1.002863,1.007960,1.024366,,,16.77,
1041,2025-06-06,JDZG,0.295000,0.862573,0.565134,0.556604,0.564054,0.393858,0.168571,,16.77,0.007369
1314,2025-06-06,RAY,1.255000,1.081897,0.774691,0.456364,0.459707,0.980469,0.343459,,16.77,0.017797
...,...,...,...,...,...,...,...,...,...,...,...,...
21730,2025-06-06,CCTG,1.095000,1.004587,0.969027,0.904959,0.829545,0.655689,0.500000,,16.77,0.027132
22085,2025-06-06,PSBD,13.880000,1.001443,1.001443,0.993558,1.090338,0.903507,0.947565,,16.77,0.053820
22443,2025-06-06,SYNX,1.680000,0.976744,0.982456,0.938547,0.908108,0.413793,0.626632,,16.77,0.034683
22800,2025-06-06,SDHC,19.270000,0.969804,1.017423,1.079552,0.955853,0.775765,0.758065,,16.77,0.042520


In [50]:
filtered_df[['growth_252d', 'Sharpe']].describe()

Unnamed: 0,growth_252d,Sharpe
count,71.0,71.0
mean,1.152897,0.066064
std,1.406017,0.083841
min,0.02497,-0.001194
25%,0.293422,0.014813
50%,0.758065,0.04252
75%,1.362736,0.078577
max,8.097413,0.480168


### **Answer**

The median Sharpe ratio for the stocks is 0.0425.

Positive Sharpe means growth exceeding the risk-free rate of 4.5%.

In [51]:
filtered_df[['Symbol', 'growth_252d', 'Sharpe']].sort_values(by=['growth_252d', 'Sharpe'], ascending=[False, False]).head(10)

Unnamed: 0,Symbol,growth_252d,Sharpe
20351,JL,8.097413,0.480168
23161,ROMA,6.156406,0.364425
14193,UMAC,4.966533,0.293472
2937,NNE,4.655224,0.274909
4615,RBRK,3.184065,0.187183
16569,AHR,2.483097,0.145384
17908,AS,2.478203,0.145093
4328,MRX,2.300384,0.134489
11439,RDDT,2.225505,0.130024
6939,MTEN,2.210432,0.129125


We observe the same top 10 companies when sorting by growth_252d versus sorting by Sharpe

## Question 3: [IPO] ‘Fixed Months Holding Strategy’
### What is the optimal number of months (1 to 12) to hold a newly IPO'd stock in order to maximize average growth?
(Assume you buy at the close of the first trading day and sell after a fixed number of trading days.)

### **Goal:**

Investigate whether holding an IPO stock for a fixed number of months after its first trading day produces better returns, using future growth columns.

### **Steps:**

1. **Start from the existing DataFrame** from Question 2 (75 tickers from IPOs in the first 5 months of 2024).

  Add 12 new columns:
  `future_growth_1m`, `future_growth_2m`, ..., `future_growth_12m`

  *(Assume 1 month = 21 trading days, so growth is calculated over 21, 42, ..., 252 trading days)*
  This logic is similar to `historyPrices['growth_future_30d']` from Code Snippet 7, but extended to longer timeframes.

2. **Determine the first trading day**
 `(min_date)` for each ticker.
This is the earliest date in the data for each stock.

3. **Join the data:**
Perform an inner join between the `min_date` DataFrame and the future growth data on both `ticker` and `date`.

  ➤ You should end up with 75 records (one per IPO) with all 12 `future_growth_... ` fields populated.

4. **Compute descriptive statistics** for the resulting DataFrame:

  Use `.describe()` or similar to analyze each of the 12 columns:

  * `future_growth_1m`
  * `future_growth_2m`
  * ...
  * `future_growth_12m`

5. **Determine the best holding period:**

  * Find the number of months (1 to 12) where the average (mean) future growth is maximal.
  * This optimal month shows an uplift of >1% compared to all others.
  * Still, the average return remains less than 1 (i.e., expected return is less than doubling your investment).