# Feature Engineering Avanzado

In [4]:
import numpy as np
import pandas as pd

df = pd.read_csv("../data/train_bogie_clean.csv")
df["timestamp"] = pd.to_datetime(df["timestamp"])

# Orden temporal por tren y bogie
df = df.sort_values(["train_id", "bogie_id", "timestamp"]).reset_index(drop=True)


In [5]:
# Señales sobre las que vamos a crear agregados temporales
base_cols = [
    "vib_total_rms",
    "bogie_temp_c",
    "temp_bogie_minus_env",
    "temp_diff_wheels"
]

window = 5  # últimas 5 lecturas


In [6]:
for col in base_cols:
    new_col = f"{col}_mean_{window}"
    df[new_col] = (
        df
        .groupby(["train_id", "bogie_id"])[col]
        .transform(lambda s: s.rolling(window=window, min_periods=1).mean())
    )


In [7]:
for col in base_cols:
    new_col = f"{col}_max_{window}"
    df[new_col] = (
        df
        .groupby(["train_id", "bogie_id"])[col]
        .transform(lambda s: s.rolling(window=window, min_periods=1).max())
    )


In [8]:
for col in base_cols:
    mean_col = f"{col}_mean_{window}"
    trend_col = f"{col}_trend_{window}"
    df[trend_col] = df[col] - df[mean_col]


In [9]:
new_cols = [c for c in df.columns if any(x in c for x in ["_mean_5", "_max_5", "_trend_5"])]
print("Nuevas columnas:", new_cols)
print("Shape final df:", df.shape)
print(df[new_cols].describe().T.head())


Nuevas columnas: ['vib_total_rms_mean_5', 'bogie_temp_c_mean_5', 'temp_bogie_minus_env_mean_5', 'temp_diff_wheels_mean_5', 'vib_total_rms_max_5', 'bogie_temp_c_max_5', 'temp_bogie_minus_env_max_5', 'temp_diff_wheels_max_5', 'vib_total_rms_trend_5', 'bogie_temp_c_trend_5', 'temp_bogie_minus_env_trend_5', 'temp_diff_wheels_trend_5']
Shape final df: (195822, 46)
                                count       mean        std        min  \
vib_total_rms_mean_5         195822.0   4.958899   4.094245   0.712979   
bogie_temp_c_mean_5          195822.0  43.851573   6.465872 -20.000000   
temp_bogie_minus_env_mean_5  195822.0  20.662307   6.032837 -48.116364   
temp_diff_wheels_mean_5      195822.0   2.442854   0.955724   0.002829   
vib_total_rms_max_5          195822.0  11.887899  17.947975   0.712979   

                                   25%        50%        75%        max  
vib_total_rms_mean_5          2.967418   3.416825   4.037698  75.120863  
bogie_temp_c_mean_5          39.814476  43.84

In [11]:
df.to_csv("../data/bogie_full_with_windows.csv", index=False)
