# Imoport Required Libraries

In [1]:
import os
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from math import sqrt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from keras import Input
from keras.models import Sequential
from keras.layers import GRU, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')
# DATA_PATH = "/content/drive/MyDrive/Infosys_SpringBoard/datasets"
# SAVE_PATH = "/content/drive/MyDrive/Infosys_SpringBoard/PrognosAI_Glo_SS/Models/"
# os.makedirs(SAVE_PATH, exist_ok=True)

In [None]:
DATA_PATH = "D:/Spring_Board/PrognosAI/CMAPSS_Data"
MODEL_PATH = "D:/Spring_Board/PrognosAI/Models"
os.makedirs(MODEL_PATH, exist_ok=True)
RUL_CLIP_VALUE = 126
SEQ_LEN = 30
EPOCHS = 80
BATCH_SIZE = 64

In [4]:
def load_data(dataset_name,file_path):
    print("\n"+"-"*48)
    print(f"|           üìä Loading Dataset : {dataset_name}         |")
    print("-"*48)
    try:
        print(f"Attempting to read file from: {file_path}")
        df = pd.read_csv(f"{file_path}/train_{dataset_name}.txt",sep=r"\s+",header=None)
        df.columns = ['unit_number','time',"setting_1", "setting_2", "setting_3"] + [f"sensor_{i}" for i in range(1,22)]
        print(f"{'='*10} ‚úÖ {dataset_name} Loaded successfully {'='*10}")
        return df
    
    except FileNotFoundError as e:
        print(f"‚ùå ERROR: File not found at the specified path.")
        print(f"   -> Could not find: {e.filename}")
        print("Please check your 'data_path' and 'dataset_name' inputs.")
        return None

In [5]:
def preprocessing(df: pd.DataFrame, dataset_name: str):
    print("\n"+"-"*52)
    print(f"|       üìä STARTING PREPROCESSING FOR: {dataset_name}       |")
    print("-"*52)

    try:
        initial_shape = df.shape
        print(f"Initial shape of the dataset: {initial_shape}")
        
        # -------------------- Handle Null Values --------------------
        
        null_count = df.isna().sum().sum()
        if null_count > 0:
            print(f"üîç Found {null_count:,} Null values. Dropping rows with NaNs...")
            df.dropna(inplace=True)
            new_null_count = df.isna().sum().sum()
            print(f"   -> Null values remaining: {new_null_count}")
        else:
            print("‚úÖ No Null values found in the dataset.")
        
        # -------------------- Handle Duplicate Values --------------------
        
        duplicate_count = df.duplicated().sum()
        if duplicate_count > 0:
            print(f"üìù Found {duplicate_count:,} Duplicate rows. Dropping duplicates...")
            df.drop_duplicates(inplace=True, keep='first')
            new_duplicate_count = df.duplicated().sum() 
            print(f"   -> Duplicate rows remaining: {new_duplicate_count}")
        else:
            print("‚úÖ No Duplicate rows found in the dataset.")
        
        
        print(f"üî¨ **Head of the processed {dataset_name} data:**")
        display(df.head())
        
        
        final_shape = df.shape
        print(f"Final shape after preprocessing: {final_shape}")
        
        rows_removed = initial_shape[0] - final_shape[0]
        print(f"Summary: {rows_removed:,} rows were removed (Nulls/Duplicates).")
        print(f"{'='*10}‚úÖ Preprocessing SUCCESS for {dataset_name} {'='*10}")
        
        return df
    
    except AttributeError:
        print(f"‚ùå ERROR: The input variable for {dataset_name} is not a valid pandas DataFrame.")
        print("Please check that the file was loaded correctly.")
        return None
    
    except Exception as e:
        print(f"‚ùå AN UNEXPECTED ERROR OCCURRED during preprocessing {dataset_name}: {e}")
        return None
    

In [6]:
def calculate_RUL(dataset : pd.DataFrame, dataset_name, RUL_clip : int):
    print("\n"+"-"*63)
    print(f"|       üìä Calsulating Remaining Useful Life FOR: {dataset_name}       |")
    print("-"*63)
    
    try:
        max_cycle = dataset.groupby("unit_number")["time"].max()
        dataset=dataset.merge(max_cycle.to_frame(name="max_cycle"),on="unit_number")
        dataset["RUL"]=(dataset['max_cycle']-dataset['time']).clip(upper=RUL_clip)
        dataset.drop('max_cycle',axis=1,inplace=True)
        print(f"‚úÖ RUL calculation complete for {dataset_name} with clip value of {RUL_clip}.")
        return dataset
    
    except Exception as e:
        print(f"‚ùå ERROR during RUL calculation for {dataset_name}: {e}")
        return None
    

In [7]:
def remove_constant_features(dataset: pd.DataFrame, dataset_name: str):
    print("\n" + "-"*65)
    print(f"|        ‚öôÔ∏è FEATURE SELECTION: REMOVING CONSTANT COLUMNS:         |")
    print("-" * 65)
    
    try:
        # Define the groups
        id_cols = ["unit_number", "time"]
        setting_cols = ["setting_1", "setting_2", "setting_3"]
        sensor_cols = [f"sensor_{i}" for i in range(1, 22)]
        target_col = ["RUL"]
        
        # All columns we want to consider for the model
        potential_features = setting_cols + sensor_cols
        
        # Filter to only those present in the dataframe
        active_features = [c for c in potential_features if c in dataset.columns]
        
        # Calculate variance and identify non-constant features
        feature_data = dataset[active_features]
        constant_mask = feature_data.var() == 0
        
        constant_features_to_drop = [c for c in active_features if constant_mask[c]]
        keep_features = [c for c in active_features if not constant_mask[c]]
            
        print(f"üóëÔ∏è Found {len(constant_features_to_drop)} constant features to be REMOVED.")
        print(f"   {constant_features_to_drop}")
        
        # Final DataFrame contains IDs + Kept Features + RUL
        final_column_order = id_cols + keep_features + target_col
        processed_df = dataset[final_column_order].copy()
        
        print(f"\n‚úÖ Feature selection complete for {dataset_name}.")
        print(f"    Total features kept (Settings + Sensors): {len(keep_features)}")

        # Return the DF and ONLY the predictive features (to be scaled)
        return processed_df, keep_features
    except Exception as e:
        print(f"\n‚ùå ERROR during feature selection: {e}")
        return None, None

In [8]:
def apply_global_scaler(df: pd.DataFrame, dataset_name: str, model_path: str, scale_cols: list) -> pd.DataFrame | None:
    print("\n" + "="*60)
    print(f"|         üìä SCALING DATASET: GLOBAL (Sensors & Settings)         |")
    print("="*60)
    
    scaler_filename = f"scaler_{dataset_name}.pkl"
    scaler_path = Path(model_path) / scaler_filename
    
    try:
        # Prepare the data for scaling
        X_to_scale = df[scale_cols]
        
        if scaler_path.exists():
            print(f"‚úÖ Scaler found! Loading existing scaler from: {scaler_path}")
            scaler = joblib.load(scaler_path)
        else:
            print(f"üîç Fitting new StandardScaler on {len(scale_cols)} columns...")
            scaler = StandardScaler()
            scaler.fit(X_to_scale)
            joblib.dump(scaler, scaler_path)
            print(f"üíæ Scaler saved to: {scaler_path}")

        df_scaled = df.copy()
        df_scaled[scale_cols] = scaler.transform(X_to_scale)

        print(f"‚úÖ Data scaling complete. Applied to: {scale_cols}")
        return df_scaled

    except Exception as e:
        print(f"‚ùå ERROR during scaling: {e}")
        return None

In [9]:
def create_sequences(df: pd.DataFrame, feature_cols: list, seq_len: int) -> tuple[np.ndarray, np.ndarray, list]:
    print("\n" + "="*60)
    print(f"üìå Generating sequences (Seq Len: {seq_len})")
    print("="*60)

    # Feature set: Sensors (scaled) followed by Settings (unscaled)
    # setting_cols = ["setting_1", "setting_2", "setting_3"]
    # feature_cols = sensor_cols + setting_cols
    
    
    X, y = [], []
    for unit in sorted(df["unit_number"].unique()):
        u = df[df["unit_number"]==unit].sort_values("time")
        data = u[feature_cols].values
        labels = u["RUL"].values
        n_cycles = len(data)

        if n_cycles < seq_len:
            print(f"‚ö†Ô∏è Skipping unit {unit} with only {n_cycles} cycles (< {seq_len}).")
            continue
            
        # Sliding window creation
        for i in range(n_cycles - seq_len + 1):
            X.append(data[i:i+seq_len])
            y.append(labels[i+seq_len-1])
            
    X = np.array(X)
    y = np.array(y)
    
    print(f"\n‚úÖ Sequence generation completed!")
    print(f"    Sequence shape: {X.shape}")
    print(f"    Labels shape: {y.shape}")
            
    return X, y, feature_cols


In [10]:
def build_gru(input_shape, units=128, dropout=0.3):
    model = Sequential([
        Input(shape=input_shape),
        GRU(units, return_sequences=False),
        BatchNormalization(),
        Dropout(dropout),
        Dense(64, activation="relu"),
        Dense(1, activation="linear")
    ])
    model.compile(optimizer="adam", loss="mse")
    return model

In [11]:
def rmse(y_true, y_pred):
    return float(sqrt(mean_squared_error(y_true, y_pred)))

In [12]:
def main(dataset_name, DATA_PATH, MODEL_PATH):
    print("="*100)
    print(f"STARTING GRU PIPELINE FOR DATASET: {dataset_name}")
    print("="*100)
    
    # 1. Load Data
    train_df = load_data(dataset_name, DATA_PATH)
    if train_df is None:
        print(f"Failed to load data for {dataset_name}. Aborting....")
        return
        
    # 2. Preprocessing (Clean Nulls & Duplicates)
    train_df = preprocessing(train_df, dataset_name)
    
    # 3. Calculate RUL and Clip
    train_df = calculate_RUL(train_df, dataset_name, RUL_CLIP_VALUE)
    print("\nHead of DataFrame after RUL Calculation:")
    display(train_df.head())
    
    # 4. Feature Selection (Remove constant sensors)
    train_df, final_features = remove_constant_features(train_df, dataset_name)
    if train_df is None: return
    
    # train_df, final_features = remove_constant_features(train_df, dataset_name)
    # if train_df is None: return
    
    print("\nHead of DataFrame after Feature Selection:")
    display(train_df.head())
    
    # 5. Scaling
    train_df_scaled = apply_global_scaler(train_df, dataset_name, MODEL_PATH, final_features)
    if train_df_scaled is None: return
    
    # train_df_scaled = apply_global_scaler(train_df, dataset_name, MODEL_PATH, final_features)
    # if train_df_scaled is None: return
    
    # 6. Sequence Creation
    X, y, feature_cols = create_sequences(train_df_scaled, final_features, SEQ_LEN)

    print(f"Final Feature Column Order ({len(feature_cols)}): {feature_cols}")
    
    # Save the final feature order list for the testing script
    joblib.dump(feature_cols, f"{MODEL_PATH}/feature_cols_{dataset_name}.pkl")
    
    # 7. Train/Validation Split (70%/15%)
    n = len(X)
    train_end = int(0.7 * n)
    val_end = int(0.85 * n)

    X_train, y_train = X[:train_end], y[:train_end]
    X_val, y_val = X[train_end:val_end], y[train_end:val_end]
    
    print(f"-> Train/Val split sizes: {len(X_train)} / {len(X_val)}")
    
    # 8. Build Model
    input_shape = (X.shape[1], X.shape[2])
    model = build_gru(input_shape=input_shape, units=128, dropout=0.3)
    
    # 9. Callbacks
    chkpt_path = f"{MODEL_PATH}/best_{dataset_name}.keras"
    callbacks = [
        EarlyStopping(monitor="val_loss", patience=8, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        ModelCheckpoint(chkpt_path, monitor="val_loss", save_best_only=True)
    ]
    
    # 10. Training
    print("-> Starting training...")
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=callbacks,
        verbose=2
    )
    
    # 11. Final Save
    final_model_path = f"{MODEL_PATH}/model_{dataset_name}_hybrid.keras"
    model.save(final_model_path)
    
    # 12. Final Evaluation on Validation
    val_pred = model.predict(X_val).flatten()
    val_rmse = rmse(y_val, val_pred)
    
    print(f"\nüéâ Training Complete for {dataset_name}.")
    print(f"-> Validation RMSE (Best Weights): {val_rmse:.4f}")
    print(f"-> Model saved to: {final_model_path}")


In [13]:

if __name__ == "__main__":
    DATASET = "FD001" 
    main(DATASET, DATA_PATH, MODEL_PATH)

STARTING GRU PIPELINE FOR DATASET: FD001

------------------------------------------------
|           üìä Loading Dataset : FD001         |
------------------------------------------------
Attempting to read file from: D:/Spring_Board/PrognosAi/CMAPSS_Data

----------------------------------------------------
|       üìä STARTING PREPROCESSING FOR: FD001       |
----------------------------------------------------
Initial shape of the dataset: (20631, 26)
‚úÖ No Null values found in the dataset.
‚úÖ No Duplicate rows found in the dataset.
üî¨ **Head of the processed FD001 data:**


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


Final shape after preprocessing: (20631, 26)
Summary: 0 rows were removed (Nulls/Duplicates).

---------------------------------------------------------------
|       üìä Calsulating Remaining Useful Life FOR: FD001       |
---------------------------------------------------------------
‚úÖ RUL calculation complete for FD001 with clip value of 126.

Head of DataFrame after RUL Calculation:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,126
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,126
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,126
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,126
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,126



-----------------------------------------------------------------
|        ‚öôÔ∏è FEATURE SELECTION: REMOVING CONSTANT COLUMNS:         |
-----------------------------------------------------------------
üóëÔ∏è Found 5 constant features to be REMOVED.
   ['setting_3', 'sensor_1', 'sensor_10', 'sensor_18', 'sensor_19']

‚úÖ Feature selection complete for FD001.
    Total features kept (Settings + Sensors): 19

Head of DataFrame after Feature Selection:


Unnamed: 0,unit_number,time,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,...,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_20,sensor_21,RUL
0,1,1,-0.0007,-0.0004,641.82,1589.7,1400.6,14.62,21.61,554.36,...,47.47,521.66,2388.02,8138.62,8.4195,0.03,392,39.06,23.419,126
1,1,2,0.0019,-0.0003,642.15,1591.82,1403.14,14.62,21.61,553.75,...,47.49,522.28,2388.07,8131.49,8.4318,0.03,392,39.0,23.4236,126
2,1,3,-0.0043,0.0003,642.35,1587.99,1404.2,14.62,21.61,554.26,...,47.27,522.42,2388.03,8133.23,8.4178,0.03,390,38.95,23.3442,126
3,1,4,0.0007,0.0,642.35,1582.79,1401.87,14.62,21.61,554.45,...,47.13,522.86,2388.08,8133.83,8.3682,0.03,392,38.88,23.3739,126
4,1,5,-0.0019,-0.0002,642.37,1582.85,1406.22,14.62,21.61,554.0,...,47.28,522.19,2388.04,8133.8,8.4294,0.03,393,38.9,23.4044,126



|         üìä SCALING DATASET: GLOBAL (Sensors & Settings)         |
üîç Fitting new StandardScaler on 19 columns...
üíæ Scaler saved to: D:\Spring_Board\PrognosAi\Models\scaler_FD001.pkl
‚úÖ Data scaling complete. Applied to: ['setting_1', 'setting_2', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_20', 'sensor_21']

üìå Generating sequences (Seq Len: 30)

‚úÖ Sequence generation completed!
    Sequence shape: (17731, 30, 19)
    Labels shape: (17731,)
Final Feature Column Order (19): ['setting_1', 'setting_2', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_20', 'sensor_21']
-> Train/Val split sizes: 12411 / 2660
-> Starting training...
Epoch 1/80
194/194 - 16s - 84ms/step - loss: 2675.577

In [14]:
if __name__ == "__main__":
    DATASET = "FD002" 
    main(DATASET, DATA_PATH, MODEL_PATH)

STARTING GRU PIPELINE FOR DATASET: FD002

------------------------------------------------
|           üìä Loading Dataset : FD002         |
------------------------------------------------
Attempting to read file from: D:/Spring_Board/PrognosAi/CMAPSS_Data

----------------------------------------------------
|       üìä STARTING PREPROCESSING FOR: FD002       |
----------------------------------------------------
Initial shape of the dataset: (53759, 26)
‚úÖ No Null values found in the dataset.
‚úÖ No Duplicate rows found in the dataset.
üî¨ **Head of the processed FD002 data:**


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,130.42,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,164.31,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286


Final shape after preprocessing: (53759, 26)
Summary: 0 rows were removed (Nulls/Duplicates).

---------------------------------------------------------------
|       üìä Calsulating Remaining Useful Life FOR: FD002       |
---------------------------------------------------------------
‚úÖ RUL calculation complete for FD002 with clip value of 126.

Head of DataFrame after RUL Calculation:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071,126
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665,126
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723,126
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701,126
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286,126



-----------------------------------------------------------------
|        ‚öôÔ∏è FEATURE SELECTION: REMOVING CONSTANT COLUMNS:         |
-----------------------------------------------------------------
üóëÔ∏è Found 0 constant features to be REMOVED.
   []

‚úÖ Feature selection complete for FD002.
    Total features kept (Settings + Sensors): 24

Head of DataFrame after Feature Selection:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071,126
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665,126
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723,126
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701,126
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286,126



|         üìä SCALING DATASET: GLOBAL (Sensors & Settings)         |
üîç Fitting new StandardScaler on 24 columns...
üíæ Scaler saved to: D:\Spring_Board\PrognosAi\Models\scaler_FD002.pkl
‚úÖ Data scaling complete. Applied to: ['setting_1', 'setting_2', 'setting_3', 'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', 'sensor_21']

üìå Generating sequences (Seq Len: 30)

‚úÖ Sequence generation completed!
    Sequence shape: (46219, 30, 24)
    Labels shape: (46219,)
Final Feature Column Order (24): ['setting_1', 'setting_2', 'setting_3', 'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', '

In [15]:
if __name__ == "__main__":
    DATASET = "FD003" 
    main(DATASET, DATA_PATH, MODEL_PATH)

STARTING GRU PIPELINE FOR DATASET: FD003

------------------------------------------------
|           üìä Loading Dataset : FD003         |
------------------------------------------------
Attempting to read file from: D:/Spring_Board/PrognosAi/CMAPSS_Data

----------------------------------------------------
|       üìä STARTING PREPROCESSING FOR: FD003       |
----------------------------------------------------
Initial shape of the dataset: (24720, 26)
‚úÖ No Null values found in the dataset.
‚úÖ No Duplicate rows found in the dataset.
üî¨ **Head of the processed FD003 data:**


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,522.31,2388.01,8145.32,8.4246,0.03,391,2388,100.0,39.11,23.3537
1,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,522.42,2388.03,8152.85,8.4403,0.03,392,2388,100.0,38.99,23.4491
2,1,3,-0.0014,-0.0002,100.0,518.67,642.18,1582.35,1405.61,14.62,...,522.03,2388.0,8150.17,8.3901,0.03,391,2388,100.0,38.85,23.3669
3,1,4,-0.002,0.0001,100.0,518.67,642.92,1585.61,1392.27,14.62,...,522.49,2388.08,8146.56,8.3878,0.03,392,2388,100.0,38.96,23.2951
4,1,5,0.0016,0.0,100.0,518.67,641.68,1588.63,1397.65,14.62,...,522.58,2388.03,8147.8,8.3869,0.03,392,2388,100.0,39.14,23.4583


Final shape after preprocessing: (24720, 26)
Summary: 0 rows were removed (Nulls/Duplicates).

---------------------------------------------------------------
|       üìä Calsulating Remaining Useful Life FOR: FD003       |
---------------------------------------------------------------
‚úÖ RUL calculation complete for FD003 with clip value of 126.

Head of DataFrame after RUL Calculation:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,2388.01,8145.32,8.4246,0.03,391,2388,100.0,39.11,23.3537,126
1,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,2388.03,8152.85,8.4403,0.03,392,2388,100.0,38.99,23.4491,126
2,1,3,-0.0014,-0.0002,100.0,518.67,642.18,1582.35,1405.61,14.62,...,2388.0,8150.17,8.3901,0.03,391,2388,100.0,38.85,23.3669,126
3,1,4,-0.002,0.0001,100.0,518.67,642.92,1585.61,1392.27,14.62,...,2388.08,8146.56,8.3878,0.03,392,2388,100.0,38.96,23.2951,126
4,1,5,0.0016,0.0,100.0,518.67,641.68,1588.63,1397.65,14.62,...,2388.03,8147.8,8.3869,0.03,392,2388,100.0,39.14,23.4583,126



-----------------------------------------------------------------
|        ‚öôÔ∏è FEATURE SELECTION: REMOVING CONSTANT COLUMNS:         |
-----------------------------------------------------------------
üóëÔ∏è Found 4 constant features to be REMOVED.
   ['setting_3', 'sensor_1', 'sensor_18', 'sensor_19']

‚úÖ Feature selection complete for FD003.
    Total features kept (Settings + Sensors): 20

Head of DataFrame after Feature Selection:


Unnamed: 0,unit_number,time,setting_1,setting_2,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,...,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_20,sensor_21,RUL
0,1,1,-0.0005,0.0004,642.36,1583.23,1396.84,14.62,21.61,553.97,...,47.3,522.31,2388.01,8145.32,8.4246,0.03,391,39.11,23.3537,126
1,1,2,0.0008,-0.0003,642.5,1584.69,1396.89,14.62,21.61,554.55,...,47.23,522.42,2388.03,8152.85,8.4403,0.03,392,38.99,23.4491,126
2,1,3,-0.0014,-0.0002,642.18,1582.35,1405.61,14.62,21.61,554.43,...,47.22,522.03,2388.0,8150.17,8.3901,0.03,391,38.85,23.3669,126
3,1,4,-0.002,0.0001,642.92,1585.61,1392.27,14.62,21.61,555.21,...,47.24,522.49,2388.08,8146.56,8.3878,0.03,392,38.96,23.2951,126
4,1,5,0.0016,0.0,641.68,1588.63,1397.65,14.62,21.61,554.74,...,47.15,522.58,2388.03,8147.8,8.3869,0.03,392,39.14,23.4583,126



|         üìä SCALING DATASET: GLOBAL (Sensors & Settings)         |
üîç Fitting new StandardScaler on 20 columns...
üíæ Scaler saved to: D:\Spring_Board\PrognosAi\Models\scaler_FD003.pkl
‚úÖ Data scaling complete. Applied to: ['setting_1', 'setting_2', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_20', 'sensor_21']

üìå Generating sequences (Seq Len: 30)

‚úÖ Sequence generation completed!
    Sequence shape: (21820, 30, 20)
    Labels shape: (21820,)
Final Feature Column Order (20): ['setting_1', 'setting_2', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_20', 'sensor_21']
-> Train/Val split sizes: 15273 / 3274
-> Starting training...
Epoch 1/80
239/239 - 17s - 

In [16]:
if __name__ == "__main__":
    DATASET = "FD004" 
    main(DATASET, DATA_PATH, MODEL_PATH)

STARTING GRU PIPELINE FOR DATASET: FD004

------------------------------------------------
|           üìä Loading Dataset : FD004         |
------------------------------------------------
Attempting to read file from: D:/Spring_Board/PrognosAi/CMAPSS_Data

----------------------------------------------------
|       üìä STARTING PREPROCESSING FOR: FD004       |
----------------------------------------------------
Initial shape of the dataset: (61249, 26)
‚úÖ No Null values found in the dataset.
‚úÖ No Duplicate rows found in the dataset.
üî¨ **Head of the processed FD004 data:**


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,129.78,2387.99,8074.83,9.3335,0.02,330,2212,100.0,10.62,6.367
1,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,312.59,2387.73,8046.13,9.1913,0.02,361,2324,100.0,24.37,14.6552
2,1,3,42.0038,0.8409,100.0,445.0,548.95,1343.12,1117.05,3.91,...,129.62,2387.97,8066.62,9.4007,0.02,329,2212,100.0,10.48,6.4213
3,1,4,42.0,0.84,100.0,445.0,548.7,1341.24,1118.03,3.91,...,129.8,2388.02,8076.05,9.3369,0.02,328,2212,100.0,10.54,6.4176
4,1,5,25.0063,0.6207,60.0,462.54,536.1,1255.23,1033.59,7.05,...,164.11,2028.08,7865.8,10.8366,0.02,305,1915,84.93,14.03,8.6754


Final shape after preprocessing: (61249, 26)
Summary: 0 rows were removed (Nulls/Duplicates).

---------------------------------------------------------------
|       üìä Calsulating Remaining Useful Life FOR: FD004       |
---------------------------------------------------------------
‚úÖ RUL calculation complete for FD004 with clip value of 126.

Head of DataFrame after RUL Calculation:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,2387.99,8074.83,9.3335,0.02,330,2212,100.0,10.62,6.367,126
1,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,2387.73,8046.13,9.1913,0.02,361,2324,100.0,24.37,14.6552,126
2,1,3,42.0038,0.8409,100.0,445.0,548.95,1343.12,1117.05,3.91,...,2387.97,8066.62,9.4007,0.02,329,2212,100.0,10.48,6.4213,126
3,1,4,42.0,0.84,100.0,445.0,548.7,1341.24,1118.03,3.91,...,2388.02,8076.05,9.3369,0.02,328,2212,100.0,10.54,6.4176,126
4,1,5,25.0063,0.6207,60.0,462.54,536.1,1255.23,1033.59,7.05,...,2028.08,7865.8,10.8366,0.02,305,1915,84.93,14.03,8.6754,126



-----------------------------------------------------------------
|        ‚öôÔ∏è FEATURE SELECTION: REMOVING CONSTANT COLUMNS:         |
-----------------------------------------------------------------
üóëÔ∏è Found 0 constant features to be REMOVED.
   []

‚úÖ Feature selection complete for FD004.
    Total features kept (Settings + Sensors): 24

Head of DataFrame after Feature Selection:


Unnamed: 0,unit_number,time,setting_1,setting_2,setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,2387.99,8074.83,9.3335,0.02,330,2212,100.0,10.62,6.367,126
1,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,2387.73,8046.13,9.1913,0.02,361,2324,100.0,24.37,14.6552,126
2,1,3,42.0038,0.8409,100.0,445.0,548.95,1343.12,1117.05,3.91,...,2387.97,8066.62,9.4007,0.02,329,2212,100.0,10.48,6.4213,126
3,1,4,42.0,0.84,100.0,445.0,548.7,1341.24,1118.03,3.91,...,2388.02,8076.05,9.3369,0.02,328,2212,100.0,10.54,6.4176,126
4,1,5,25.0063,0.6207,60.0,462.54,536.1,1255.23,1033.59,7.05,...,2028.08,7865.8,10.8366,0.02,305,1915,84.93,14.03,8.6754,126



|         üìä SCALING DATASET: GLOBAL (Sensors & Settings)         |
üîç Fitting new StandardScaler on 24 columns...
üíæ Scaler saved to: D:\Spring_Board\PrognosAi\Models\scaler_FD004.pkl
‚úÖ Data scaling complete. Applied to: ['setting_1', 'setting_2', 'setting_3', 'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', 'sensor_21']

üìå Generating sequences (Seq Len: 30)

‚úÖ Sequence generation completed!
    Sequence shape: (54028, 30, 24)
    Labels shape: (54028,)
Final Feature Column Order (24): ['setting_1', 'setting_2', 'setting_3', 'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9', 'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19', 'sensor_20', '

In [None]:
# for i in ['FD001','FD002','FD003','FD004']:
#     main(i,DATA_PATH,SAVE_PATH)
# main('FD001',DATA_PATH,MODEL_PATH)