# Implementation: Feature Factory

**Goal**: Create signals that predict failure.

In [None]:
import pandas as pd
import numpy as np

# Assuming we have 'validated_df' from previous step (mock loading here)
# df = pd.read_parquet("validated_data.parquet") 
# For demo, we recreate a snippet
df = pd.DataFrame({
    "engine_id": [1]*10,
    "cycle": range(1, 11),
    "sensor_temp": [100, 101, 102, 103, 105, 110, 115, 120, 130, 150]
})

# 1. Feature Engineering function
def extract_features(data):
    # Group by engine_id so rolling doesn't bleed across engines
    grouped = data.groupby("engine_id")
    
    # Rolling statistics (Window = 3)
    data["temp_mean"] = grouped["sensor_temp"].transform(lambda x: x.rolling(3).mean())
    data["temp_std"] = grouped["sensor_temp"].transform(lambda x: x.rolling(3).std())
    
    # Difference (Derivative)
    data["temp_delta"] = grouped["sensor_temp"].diff()
    
    # Fill NaNs created by rolling/diff
    data = data.fillna(0)
    return data

featured_df = extract_features(df)
print(featured_df.tail())

# 2. Feature Selection (Variance Threshold)
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold(threshold=0.1)
# Mock selection
try:
    selected_features = selector.fit_transform(featured_df[["sensor_temp", "temp_mean", "temp_std", "temp_delta"]])
    print(f"Original Features: {featured_df.shape[1]}")
    print(f"Selected Features: {selected_features.shape[1]}")
except ValueError:
    print("Not enough variance in this tiny mock dataset.")