# 02_features.ipynb

This notebook takes clean StockX resale data from multiple models to create and scale features before fitting a price elasticity model.

In [None]:
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

DATA_CLEAN = "C:\\Projects/sneaker_elasticity/data_clean/"
DATA_PROCESSED = "C:\\Projects/sneaker_elasticity/data_processed/"

In [None]:
df = pd.read_csv(os.path.join(DATA_CLEAN, "all_models_clean.csv"))

df["log_sale_price"] = np.log(df["Sale_Price"])

df["Date_of_Sale"] = pd.to_datetime(df["Date_of_Sale"], errors='coerce')
df["Release_Date"] = pd.to_datetime(df["Release_Date"], format="%m/%d/%y", errors='coerce')
df["days_since_release"] = (df["Date_of_Sale"] - df["Release_Date"]).dt.days

df = pd.get_dummies(df, columns=["Brand", "Color"], drop_first=True, dtype=int)

num_features = ["Retail_Price", "Sneaker_Size", "days_since_release"]
cat_features = [col for col in df.columns if col.startswith(("Brand_", "Color_"))]


scaler = StandardScaler()

df_scaled = df.copy()
df_scaled[num_features] = scaler.fit_transform(df[num_features])

features = num_features + cat_features
X = df_scaled[features]
y = df_scaled["log_sale_price"]

X.to_csv(os.path.join(DATA_PROCESSED, "X_features.csv"), index=False)
y.to_csv(os.path.join(DATA_PROCESSED,"y_target.csv"), index=False)

## Summary
- Created timedelta & OHE features
- Scaled featues
- Output featues & target CSVs for model creation.

Next: 03_model.ipynb â†’ train models to quantify price elasticity