# Feature Engineering 

In [6]:
import pandas as pd
import numpy as np

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")



# Log Transform Heavy Skew Features
Heavy skew:
    area_first_ha
    area_growth_abs_0_5h
    area_growth_rel_0_5h
    radial_growth_m
    radial_growth_rate_m_per_h
    projected_advance_m

In [7]:
skewed_cols = [
    "area_first_ha",
    "area_growth_abs_0_5h",
    "area_growth_rel_0_5h",
    "radial_growth_m",
    "radial_growth_rate_m_per_h",
    "projected_advance_m",
]

for col in skewed_cols:
    bad = (train[col] <= -1).sum()
    print(col, "min=", train[col].min(), "count<=-1=", bad)



area_first_ha min= 0.0375248376006194 count<=-1= 0
area_growth_abs_0_5h min= -2.156174792844467e-05 count<=-1= 0
area_growth_rel_0_5h min= -1.437844295438473e-07 count<=-1= 0
radial_growth_m min= -4.966986682575225e-05 count<=-1= 0
radial_growth_rate_m_per_h min= -1.2192654930378137e-05 count<=-1= 0
projected_advance_m min= -234.7733598623672 count<=-1= 8


In [9]:


NONNEG_LOG_COLS = [
    "area_first_ha",
    "radial_growth_m",
    "radial_growth_rate_m_per_h",
    "projected_advance_m",
    "area_growth_abs_0_5h",   # if this one is “mostly nonnegative” in your data
]

SIGNED_LOG_COLS = [
    "area_growth_rel_0_5h",   # can dip negative depending on definition
    # add any other columns that can go negative (slopes/changes)
]

def signed_log1p(x: np.ndarray) -> np.ndarray:
    return np.sign(x) * np.log1p(np.abs(x))

for col in NONNEG_LOG_COLS:
    train[f"log1p_{col}"] = np.log1p(np.clip(train[col].to_numpy(), 0, None))
    test[f"log1p_{col}"]  = np.log1p(np.clip(test[col].to_numpy(), 0, None))

for col in SIGNED_LOG_COLS:
    train[f"slog1p_{col}"] = signed_log1p(train[col].to_numpy())
    test[f"slog1p_{col}"]  = signed_log1p(test[col].to_numpy())


In [10]:
# Check for NaNs introduced by transforms
print("NaNs after feature engineering (train):")
print(train.isna().sum().sum())

print("NaNs after feature engineering (test):")
print(test.isna().sum().sum())

# Preview new columns
new_cols = [c for c in train.columns if "log1p_" in c or "slog1p_" in c]
print("New engineered columns:")
print(new_cols[:10])


NaNs after feature engineering (train):
0
NaNs after feature engineering (test):
0
New engineered columns:
['log1p_area_first', 'log1p_growth', 'log1p_area_first_ha', 'log1p_radial_growth_m', 'log1p_radial_growth_rate_m_per_h', 'log1p_projected_advance_m', 'log1p_area_growth_abs_0_5h', 'slog1p_area_growth_rel_0_5h']


# Create Monitoring Richness Features

In [11]:
# Binary: had multiple perimeters?
train["multi_perimeter_flag"] = (train["num_perimeters_0_5h"] > 1).astype(int)
test["multi_perimeter_flag"] = (test["num_perimeters_0_5h"] > 1).astype(int)

# Binary: non-zero time span?
train["nonzero_dt_flag"] = (train["dt_first_last_0_5h"] > 0).astype(int)
test["nonzero_dt_flag"] = (test["dt_first_last_0_5h"] > 0).astype(int)


# Growth Intensity Ratio

In [None]:
EPS = 1e-6

train["growth_intensity"] = train["area_growth_abs_0_5h"] / (train["area_first_ha"] + EPS)
test["growth_intensity"]  = test["area_growth_abs_0_5h"]  / (test["area_first_ha"]  + EPS)



# Directional Aggression

In [13]:
train["aggressive_toward_zone"] = (
    train["closing_speed_m_per_h"] * train["alignment_cos"]
)

test["aggressive_toward_zone"] = (
    test["closing_speed_m_per_h"] * test["alignment_cos"]
)
