In [4]:
from pathlib import Path
import os
import sys

# Project root (repo root)
PROJECT_ROOT = Path().resolve().parent
sys.path.append(str(PROJECT_ROOT))  # So you can import utils

# ✅ Fix this line
DATA_DIR = PROJECT_ROOT / "data" / "CMAPSSData"

# Create other directories if needed
MODELS_DIR = PROJECT_ROOT / "models"
OUTPUTS_DIR = PROJECT_ROOT / "outputs"
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(OUTPUTS_DIR, exist_ok=True)


In [5]:


# ✅ Step 1: Load Raw Data
import pandas as pd

col_names = ['unit', 'cycle'] + \
            [f'op_setting_{i}' for i in range(1, 4)] + \
            [f'sensor_{i}' for i in range(1, 22)]

# Load the data file (adjust the path to your local setup)
df_raw = pd.read_csv(
    DATA_DIR / "train_FD001.txt",
    sep="\s+", header=None, names=col_names)

# Quick preview
print(df_raw.head())

# ✅ Step 2: Calculate Remaining Useful Life (RUL)
rul_df = df_raw.groupby("unit")["cycle"].max().reset_index()
rul_df.columns = ['unit', 'max_cycle']
df = df_raw.merge(rul_df, on='unit', how='left')
df['RUL'] = df['max_cycle'] - df['cycle']
df.drop("max_cycle", axis=1, inplace=True)

print(df[['unit', 'cycle', 'RUL']].head(10))

# ✅ Step 3: Drop constant or noisy sensors (subset for FD001)
selected_sensors = [
    'sensor_2', 'sensor_3', 'sensor_4', 'sensor_7', 'sensor_8', 
    'sensor_9', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14',
    'sensor_15', 'sensor_17', 'sensor_20', 'sensor_21'
]

cols_to_keep = ['unit', 'cycle', 'RUL'] + [f'op_setting_{i}' for i in range(1, 4)] + selected_sensors
df = df[cols_to_keep]

# ✅ Step 4: Add rolling features (call from utils)
from utils.feature_engineering import add_rolling_features

df = add_rolling_features(df, window=5)
df.dropna(inplace=True)

# ✅ Step 5: Normalize features (call from utils)
from utils.preprocessing import normalize_features, save_dataset

df = normalize_features(df)
save_dataset(df, PROJECT_ROOT / "data" / "processed_sensor_data.csv")


   unit  cycle  op_setting_1  op_setting_2  op_setting_3  sensor_1  sensor_2  \
0     1      1       -0.0007       -0.0004         100.0    518.67    641.82   
1     1      2        0.0019       -0.0003         100.0    518.67    642.15   
2     1      3       -0.0043        0.0003         100.0    518.67    642.35   
3     1      4        0.0007        0.0000         100.0    518.67    642.35   
4     1      5       -0.0019       -0.0002         100.0    518.67    642.37   

   sensor_3  sensor_4  sensor_5  ...  sensor_12  sensor_13  sensor_14  \
0   1589.70   1400.60     14.62  ...     521.66    2388.02    8138.62   
1   1591.82   1403.14     14.62  ...     522.28    2388.07    8131.49   
2   1587.99   1404.20     14.62  ...     522.42    2388.03    8133.23   
3   1582.79   1401.87     14.62  ...     522.86    2388.08    8133.83   
4   1582.85   1406.22     14.62  ...     522.19    2388.04    8133.80   

   sensor_15  sensor_16  sensor_17  sensor_18  sensor_19  sensor_20  sensor_21  