In [4]:
# === Step 1: Import libraries ===
import numpy as np
import pandas as pd

# === Step 2: Generate synthetic Palantir stock data ===
np.random.seed(42)
n = 252  # ~1 trading year
dates = pd.bdate_range("2024-01-01", periods=n)

# Geometric Brownian Motion for close price
S0, mu, sigma, dt = 25, 0.15, 0.3, 1/252
price = [S0]
for _ in range(1, n):
    shock = np.random.normal((mu - 0.5*sigma**2)*dt, sigma*np.sqrt(dt))
    price.append(price[-1] * np.exp(shock))

df = pd.DataFrame(index=dates)
df["open"] = np.array(price) * (1 + np.random.normal(0, 0.002, size=n))
df["close"] = np.array(price)
df["high"] = df[["open", "close"]].max(axis=1) * (1 + np.random.uniform(0.001, 0.015, size=n))
df["low"] = df[["open", "close"]].min(axis=1) * (1 - np.random.uniform(0.001, 0.015, size=n))
df["volume"] = np.random.randint(100000, 500000, size=n)

# Inject missingness
df.loc[np.random.choice(df.index, 5, replace=False), "close"] = np.nan
df.loc[np.random.choice(df.index, 5, replace=False), "volume"] = np.nan

# Inject outliers
df.loc[np.random.choice(df.index, 2, replace=False), "high"] *= 1.5
df.loc[np.random.choice(df.index, 2, replace=False), "volume"] *= 5

# === Step 3: Create engineered features ===
# Feature 1: Daily Return
df['daily_return'] = df['close'].pct_change()

# Feature 2: 7-Day Rolling Volatility
df['rolling_volatility'] = df['daily_return'].rolling(window=7).std()

# Feature 3: Price-to-Volume Ratio
df['price_volume_ratio'] = df['close'] / df['volume']

df['rolling_volatility'] = df['rolling_volatility'].fillna(0)

df.head(10)


  df['daily_return'] = df['close'].pct_change()


Unnamed: 0,open,close,high,low,volume,daily_return,rolling_volatility,price_volume_ratio
2024-01-01,25.045893,25.0,25.106628,24.871937,264134.0,,0.0,9.5e-05
2024-01-02,25.353451,25.246297,25.433061,24.885822,175713.0,0.009852,0.0,0.000144
2024-01-03,25.242928,25.19091,25.355092,24.896416,318335.0,-0.002194,0.0,7.9e-05
2024-01-04,25.43425,25.511774,25.594675,25.358924,175418.0,0.012737,0.0,0.000145
2024-01-05,26.24224,26.26768,26.362557,25.896931,228232.0,0.02963,0.0,0.000115
2024-01-08,26.22889,26.162599,26.359807,25.957902,263038.0,-0.004,0.0,9.9e-05
2024-01-09,26.021065,26.057946,26.147253,25.669163,268383.0,-0.004,0.0,9.7e-05
2024-01-10,26.882378,26.858537,27.246761,26.530918,393719.0,0.030724,0.015066,6.8e-05
2024-01-11,27.304503,27.262266,27.362478,27.072712,425996.0,0.015032,0.015162,6.4e-05
2024-01-12,26.982605,27.03272,27.258259,26.947135,358722.0,-0.00842,0.01622,7.5e-05


In [5]:

# Feature 1: Daily Return
df['daily_return'] = df['close'].pct_change()

# Feature 2: 7-Day Rolling Volatility
df['rolling_volatility'] = df['daily_return'].rolling(window=7).std()

# Feature 3: Price-to-Volume Ratio
df['price_volume_ratio'] = df['close'] / df['volume']

df.head(10)



  df['daily_return'] = df['close'].pct_change()


Unnamed: 0,open,close,high,low,volume,daily_return,rolling_volatility,price_volume_ratio
2024-01-01,25.045893,25.0,25.106628,24.871937,264134.0,,,9.5e-05
2024-01-02,25.353451,25.246297,25.433061,24.885822,175713.0,0.009852,,0.000144
2024-01-03,25.242928,25.19091,25.355092,24.896416,318335.0,-0.002194,,7.9e-05
2024-01-04,25.43425,25.511774,25.594675,25.358924,175418.0,0.012737,,0.000145
2024-01-05,26.24224,26.26768,26.362557,25.896931,228232.0,0.02963,,0.000115
2024-01-08,26.22889,26.162599,26.359807,25.957902,263038.0,-0.004,,9.9e-05
2024-01-09,26.021065,26.057946,26.147253,25.669163,268383.0,-0.004,,9.7e-05
2024-01-10,26.882378,26.858537,27.246761,26.530918,393719.0,0.030724,0.015066,6.8e-05
2024-01-11,27.304503,27.262266,27.362478,27.072712,425996.0,0.015032,0.015162,6.4e-05
2024-01-12,26.982605,27.03272,27.258259,26.947135,358722.0,-0.00842,0.01622,7.5e-05
