In [1]:
import pandas as pd
import numpy as np

# Load the dataset
df=pd.read_csv('../../data_set/cleaned_dataset/csmapss_cleaned_FD001.csv')
df.head()

Unnamed: 0,engine_id,cycle,op_setting_1,op_setting_2,op_setting_3,Sensor_1,Sensor_2,Sensor_3,Sensor_4,Sensor_5,...,Sensor_12,Sensor_13,Sensor_14,Sensor_15,Sensor_16,Sensor_17,Sensor_18,Sensor_19,Sensor_20,Sensor_21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [2]:
# Columns to use as features (exclude id and cycle, plus target if any)
exclude_cols = ['engine_id', 'cycle']
feature_cols = [col for col in df.columns if col not in exclude_cols]

# Sort data by engine_id and cycle to ensure correct temporal order
df = df.sort_values(['engine_id', 'cycle']).reset_index(drop=True)


In [3]:
def generate_rolling_windows(data,  engine_col, features, window_size=30):
  sequences = []
  engine_ids = []
  cycle_ids = []

  for engine in data[engine_col].unique():
    engine_data = data[data[engine_col] == engine]
    engine_features = engine_data[features].values

    #Generating sequences with rolling window
    for i in range(window_size-1, len(engine_data)):
      seq =  engine_features[i - window_size+1 : i+1]
      sequences.append(seq)
      engine_ids.append(engine)
      cycle_ids.append(engine_data.iloc[i]['cycle'])

  # Convert to array for modeling
  sequence = np.array(sequences)
  return sequence, engine_ids, cycle_ids


In [4]:
window_size = 30  # Typical rolling window length; adjust as needed
sequences, engine_ids, cycle_ids = generate_rolling_windows(df, 'engine_id', feature_cols, window_size)

print("Shape of rolling window sequences:", sequences.shape)  # (num_sequences, window_size, num_features)
print("Example sequence shape:", sequences[0].shape)

Shape of rolling window sequences: (17731, 30, 24)
Example sequence shape: (30, 24)


In [5]:
# Print the first sequence info
print(f"Engine ID: {engine_ids[0]}, Cycle: {cycle_ids[0]}")
print("Sequence data for first time window (shape {}):".format(sequences[0].shape))
print(sequences[0])


Engine ID: 1, Cycle: 30.0
Sequence data for first time window (shape (30, 24)):
[[-7.00000e-04 -4.00000e-04  1.00000e+02  5.18670e+02  6.41820e+02
   1.58970e+03  1.40060e+03  1.46200e+01  2.16100e+01  5.54360e+02
   2.38806e+03  9.04619e+03  1.30000e+00  4.74700e+01  5.21660e+02
   2.38802e+03  8.13862e+03  8.41950e+00  3.00000e-02  3.92000e+02
   2.38800e+03  1.00000e+02  3.90600e+01  2.34190e+01]
 [ 1.90000e-03 -3.00000e-04  1.00000e+02  5.18670e+02  6.42150e+02
   1.59182e+03  1.40314e+03  1.46200e+01  2.16100e+01  5.53750e+02
   2.38804e+03  9.04407e+03  1.30000e+00  4.74900e+01  5.22280e+02
   2.38807e+03  8.13149e+03  8.43180e+00  3.00000e-02  3.92000e+02
   2.38800e+03  1.00000e+02  3.90000e+01  2.34236e+01]
 [-4.30000e-03  3.00000e-04  1.00000e+02  5.18670e+02  6.42350e+02
   1.58799e+03  1.40420e+03  1.46200e+01  2.16100e+01  5.54260e+02
   2.38808e+03  9.05294e+03  1.30000e+00  4.72700e+01  5.22420e+02
   2.38803e+03  8.13323e+03  8.41780e+00  3.00000e-02  3.90000e+02
   2.3

In [6]:
# Check that sequence length matches window size
assert sequences.shape[1] == window_size, "Sequence window length mismatch"

# Check that sequences are ordered by cycle (manual inspection example)
assert all(cycle_ids[i] > cycle_ids[i-1] or engine_ids[i] != engine_ids[i-1] for i in range(1, len(cycle_ids))), "Cycle order violation"

print("Basic validation checks passed.")

Basic validation checks passed.


In [7]:
# Save sequences and metadata for modeling
np.save('rolling_window_sequences.npy', sequences)
pd.DataFrame({'engine_id': engine_ids, 'cycle': cycle_ids}).to_csv('sequence_metadata.csv', index=False)