In [None]:
import pandas as pd
import numpy as np

# === Load Data ===
path = "C:/Users/flass/OneDrive/AI Financial Model/S&P 500 Chatgpt Version/df_labeled.parquet"
df = pd.read_parquet(path)

# Ensure datetime format
df['date'] = pd.to_datetime(df['date'])

# Sort for rolling calculations
df = df.sort_values(['ticker', 'date']).reset_index(drop=True)

# === Technical Indicators ===
def compute_rsi(series, period):
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    ma_up = up.rolling(period, min_periods=period).mean()
    ma_down = down.rolling(period, min_periods=period).mean()
    rs = ma_up / ma_down
    return 100 - (100 / (1 + rs))

# RSI (daily)
df['rsi_4'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: compute_rsi(x, 4))
df['rsi_7'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: compute_rsi(x, 7))
df['rsi_14'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: compute_rsi(x, 14))
df['rsi_28'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: compute_rsi(x, 28))

# RSI (weekly)
df['week'] = df['date'].dt.to_period('W').dt.start_time
weekly = df.groupby(['ticker', 'week']).agg({
    'adjusted_close': 'last'
}).rename(columns={'adjusted_close': 'weekly_close'}).reset_index()
weekly['rsi_weekly_7'] = weekly.groupby('ticker')['weekly_close'].transform(lambda x: compute_rsi(x, 7))
df = pd.merge(df, weekly[['ticker', 'week', 'rsi_weekly_7']], on=['ticker', 'week'], how='left')
df.drop(columns='week', inplace=True)

# Moving Averages & Relative Price
for period in [50, 100, 200]:
    sma = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.rolling(period, min_periods=period).mean())
    df[f'price_rel_sma{period}d'] = df['adjusted_close'] / sma

for weeks in [26, 52, 156, 260]:
    days = weeks * 5  # approximate trading days
    sma = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.rolling(days, min_periods=days).mean())
    df[f'price_rel_sma{weeks}w'] = df['adjusted_close'] / sma

# Returns
df['returns_1w'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.pct_change(5))
df['returns_1m'] = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.pct_change(21))

# MACD
exp12 = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
exp26 = df.groupby('ticker')['adjusted_close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
df['macd'] = exp12 - exp26
df['macd_signal'] = df.groupby('ticker')['macd'].transform(lambda x: x.ewm(span=9, adjust=False).mean())

# Volume indicators
df['volume_sma_20d'] = df.groupby('ticker')['volume'].transform(lambda x: x.rolling(20, min_periods=20).mean())
df['volume_rel_20d'] = df['volume'] / df['volume_sma_20d']

df['volume_week'] = df.groupby(['ticker', df['date'].dt.to_period('W')])['volume'].transform('sum')
df['volume_sma_12w'] = df.groupby('ticker')['volume_week'].transform(lambda x: x.rolling(12, min_periods=12).mean())
df['volume_rel_12w'] = df['volume_week'] / df['volume_sma_12w']

# Save Output
output_path = "C:/Users/flass/OneDrive/AI Financial Model/S&P 500 Chatgpt Version/df_technical_features.csv"
df.to_csv(output_path, index=False)
print(f"✅ Saved to {output_path}")


In [4]:
import pandas as pd

file_path = "C:/Users/flass/OneDrive/AI Financial Model/S&P 500 Chatgpt Version/df_model_input.csv"

# Load a small sample
df = pd.read_csv(file_path, nrows=5000)

# Show shape and preview columns
print(f"✅ Loaded technical features file with shape: {df.shape}\n")
print("📋 Columns:")
for col in df.columns:
    print("•", col)


✅ Loaded technical features file with shape: (5000, 411)

📋 Columns:
• date
• open
• high
• low
• close
• adjusted_close
• volume
• ticker_x
• ticker_y
• inc_filing_date
• inc_currency_symbol
• inc_researchDevelopment
• inc_effectOfAccountingCharges
• inc_incomeBeforeTax
• inc_minorityInterest
• inc_netIncome
• inc_sellingGeneralAdministrative
• inc_sellingAndMarketingExpenses
• inc_grossProfit
• inc_reconciledDepreciation
• inc_ebit
• inc_ebitda
• inc_depreciationAndAmortization
• inc_nonOperatingIncomeNetOther
• inc_operatingIncome
• inc_otherOperatingExpenses
• inc_interestExpense
• inc_taxProvision
• inc_interestIncome
• inc_netInterestIncome
• inc_extraordinaryItems
• inc_nonRecurring
• inc_otherItems
• inc_incomeTaxExpense
• inc_totalRevenue
• inc_totalOperatingExpenses
• inc_costOfRevenue
• inc_totalOtherIncomeExpenseNet
• inc_discontinuedOperations
• inc_netIncomeFromContinuingOps
• inc_netIncomeApplicableToCommonShares
• inc_preferredStockAndOtherAdjustments
• inc_totalAssets


In [4]:
df[df['ticker'] == 'A'].isna().sum()


ticker           0
date             0
pe_ttm           0
pe_rel_index     0
rsi_14          14
macd             0
macd_signal      0
returns_1w       5
returns_1m      21
bb_upper        19
bb_lower        19
dtype: int64