In [1]:
# ==========================================
# PrognosAI – CMAPSS Preprocessing Script
# Generates: preprocessed CSVs + LSTM sequences (.npy)
# Works for FD001–FD004
# ==========================================

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import os

# ================== SETTINGS ==================
DATASETS = ["FD001", "FD002", "FD003", "FD004"]
RAW_PATH = r"C:\Users\Nithin G J\Desktop\PragnosAI\archive (1)\CMaps"
SAVE_PATH = r"C:\Users\Nithin G J\Desktop\PragnosAI\processed_data"  # folder to save outputs
WINDOW_SIZE = 30

os.makedirs(SAVE_PATH, exist_ok=True)

# Function to create rolling sequences
def create_sequences(df, sensor_cols, window_size):
    sequences, rul = [], []
    for unit in df['unit_number'].unique():
        unit_data = df[df['unit_number']==unit].sort_values('time_in_cycles')
        sensor_data = unit_data[sensor_cols].values
        rul_data = unit_data['RUL'].values
        for i in range(len(sensor_data) - window_size + 1):
            sequences.append(sensor_data[i:i+window_size])
            rul.append(rul_data[i+window_size-1])
    return np.array(sequences), np.array(rul)

# ================== PROCESS DATASETS ==================
for ds in DATASETS:
    print(f"\n--- Preprocessing {ds} ---")
    
    try:
        # ======= LOAD TRAIN =======
        train_file = os.path.join(RAW_PATH, f"train_{ds}.txt")
        col_names = ['unit_number', 'time_in_cycles'] + [f'operational_setting_{i}' for i in range(1, 4)] + [f'sensor_{i}' for i in range(1, 22)]
        train = pd.read_csv(train_file, sep=' ', header=None, names=col_names, engine='python')
        train = train.drop(columns=[c for c in train.columns if 'Unnamed' in c]).reset_index(drop=True)
        
        # ======= LOAD TEST =======
        test_file = os.path.join(RAW_PATH, f"test_{ds}.txt")
        test = pd.read_csv(test_file, sep=' ', header=None, names=col_names, engine='python')
        test = test.drop(columns=[c for c in test.columns if 'Unnamed' in c]).reset_index(drop=True)
        
        # ======= LOAD RUL =======
        rul_file = os.path.join(RAW_PATH, f"RUL_{ds}.txt")
        rul = pd.read_csv(rul_file, header=None)
        rul.columns = ['RUL']
        
        # ======= COMPUTE RUL FOR TRAIN =======
        max_cycle = train.groupby('unit_number')['time_in_cycles'].transform('max')
        train['RUL'] = max_cycle - train['time_in_cycles']
        
        # ======= NORMALIZE SENSOR COLUMNS =======
        sensor_cols = [c for c in train.columns if 'sensor' in c]
        scaler = MinMaxScaler()
        train[sensor_cols] = scaler.fit_transform(train[sensor_cols])
        test[sensor_cols] = scaler.transform(test[sensor_cols])  # use same scaler
        
        # ======= SAVE PROCESSED CSV =======
        train.to_csv(os.path.join(SAVE_PATH, f"train_{ds}_preprocessed.csv"), index=False)
        test.to_csv(os.path.join(SAVE_PATH, f"test_{ds}_preprocessed.csv"), index=False)
        rul.to_csv(os.path.join(SAVE_PATH, f"RUL_{ds}.csv"), index=False)
        print(f"{ds} CSVs saved successfully.")
        
        # ======= CREATE LSTM SEQUENCES =======
        X, y = create_sequences(train, sensor_cols, WINDOW_SIZE)
        np.save(os.path.join(SAVE_PATH, f"X_{ds}.npy"), X)
        np.save(os.path.join(SAVE_PATH, f"y_{ds}.npy"), y)
        print(f"{ds} LSTM sequences saved. Shape: X={X.shape}, y={y.shape}")
        
    except Exception as e:
        print(f"Error processing {ds}: {e}")



--- Preprocessing FD001 ---


  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


FD001 CSVs saved successfully.
FD001 LSTM sequences saved. Shape: X=(17302, 30, 21), y=(17302,)

--- Preprocessing FD002 ---


  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


FD002 CSVs saved successfully.
FD002 LSTM sequences saved. Shape: X=(38215, 30, 21), y=(38215,)

--- Preprocessing FD003 ---


  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


FD003 CSVs saved successfully.
FD003 LSTM sequences saved. Shape: X=(21278, 30, 21), y=(21278,)

--- Preprocessing FD004 ---


  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


FD004 CSVs saved successfully.
FD004 LSTM sequences saved. Shape: X=(45705, 30, 21), y=(45705,)
