In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# Load the data
train = pd.read_csv('train_FD004.csv', low_memory=False)

# Define a function to calculate Fisher score using first 50 and last 50 samples of each engine
def fisher_score_sensor(df, sensor, start_cycles=50, end_cycles=50):
    begin_life = df[df['time, in cycles'] <= start_cycles][sensor]
    end_life = df[df['time, in cycles'] >= (df['time, in cycles'].max() - end_cycles + 1)][sensor]
    mean_diff = abs(begin_life.mean() - end_life.mean())
    within_var = begin_life.var() + end_life.var()
    return mean_diff / within_var

# Apply Fisher score calculation across each sensor
sensor_columns = [col for col in train.columns if col.startswith('sensor')]
fisher_scores = {sensor: fisher_score_sensor(train, sensor) for sensor in sensor_columns}

# Select the top sensors based on Fisher scores
top_sensors = sorted(fisher_scores, key=fisher_scores.get, reverse=True)[:5]
print("Top sensors selected based on Fisher score:", top_sensors)

# Calculate RUL for each engine
train['RUL'] = train.groupby('unit number')['time, in cycles'].transform(lambda x: x.max() - x)

# Apply EMA for each top sensor
ema_span = 50
for sensor in top_sensors:
    train[f'{sensor}_EMA'] = train.groupby('unit number')[sensor].transform(lambda x: x.ewm(span=ema_span, adjust=False).mean())
train = train[['unit number', 'time, in cycles'] + [f'{sensor}_EMA' for sensor in top_sensors] + ['RUL']]
import matplotlib.pyplot as plt
import os
# Add gradient columns to check for consecutive direction
for sensor in top_sensors:
    # Calculate the gradient between consecutive EMA points
    train[f'{sensor}_EMA_gradient'] = train.groupby('unit number')[f'{sensor}_EMA'].diff()

# Define function to check if EMA direction is consistent over 5 cycles
def check_consistent_direction(df, sensor, window=5):
    gradients = df[f'{sensor}_EMA_gradient']
    direction = np.sign(gradients)
    # Check if within a rolling window of 5, all directions are the same (either all 1 or all -1)
    return direction.rolling(window=window).apply(lambda x: all(x == x[0]), raw=True).fillna(0).astype(bool)

# Apply this function for each sensor and create a column indicating consistent direction over 5 cycles
for sensor in top_sensors:
    train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
        lambda x: check_consistent_direction(x, sensor)
    ).reset_index(level=0, drop=True)

Top sensors selected based on Fisher score: ['sensor measurement 16', 'sensor measurement 10', 'sensor measurement 15', 'sensor measurement 11', 'sensor measurement 19']


  train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
  train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
  train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
  train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
  train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(


In [2]:
train.head()

Unnamed: 0,unit number,"time, in cycles",sensor measurement 16_EMA,sensor measurement 10_EMA,sensor measurement 15_EMA,sensor measurement 11_EMA,sensor measurement 19_EMA,RUL,sensor measurement 16_EMA_gradient,sensor measurement 10_EMA_gradient,sensor measurement 15_EMA_gradient,sensor measurement 11_EMA_gradient,sensor measurement 19_EMA_gradient,sensor measurement 16_EMA_consistent_direction,sensor measurement 10_EMA_consistent_direction,sensor measurement 15_EMA_consistent_direction,sensor measurement 11_EMA_consistent_direction,sensor measurement 19_EMA_consistent_direction
0,1,1,0.02,1.01,9.3335,41.69,100.0,320,,,,,,False,False,False,False,False
1,1,2,0.02,1.012353,9.327924,41.778235,100.0,319,0.0,0.002353,-0.005576,0.088235,0.0,False,False,False,False,False
2,1,3,0.02,1.012261,9.330778,41.773599,100.0,318,0.0,-9.2e-05,0.002854,-0.004637,0.0,False,False,False,False,False
3,1,4,0.02,1.012564,9.331018,41.769928,100.0,317,0.0,0.000304,0.00024,-0.003671,0.0,False,False,False,False,False
4,1,5,0.02,1.009326,9.39006,41.56248,99.40902,316,0.0,-0.003238,0.059042,-0.207448,-0.59098,False,False,False,False,False


In [3]:
train.to_csv('train_FD004_EMA_Gradient.csv', index=False)

In [4]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Check for 5 consecutive cycles with consistent direction, where at least 4 sensors are TRUE simultaneously for each engine
consecutive_cycles = 5
required_true_sensors = 3  # Minimum number of sensors required to have consecutive TRUE values simultaneously

output_dir = 'plots'
os.makedirs(output_dir, exist_ok=True)

# List to collect results for each engine
results = []

for engine_id, group in train.groupby('unit number'):
    # Create a DataFrame of consistent directions for each sensor
    consistent_directions = group[[f'{sensor}_EMA_consistent_direction' for sensor in top_sensors]]
    
    # Identify rows where at least 4 sensors are TRUE simultaneously
    sufficient_sensors_true = (consistent_directions.sum(axis=1) >= required_true_sensors)
    
    # Find rolling windows where this condition is TRUE for all 5 consecutive cycles
    consecutive_true = sufficient_sensors_true.rolling(window=consecutive_cycles).apply(lambda x: all(x), raw=True).fillna(0).astype(bool)
    
    # Get the index of the first cycle where the condition is met
    valid_cycles = group.loc[consecutive_true].index.tolist()
    Degradation_Onset = group.loc[valid_cycles[0], 'time, in cycles'] if valid_cycles else None
    
    # Print the first instance if it exists
    if Degradation_Onset:
        print(f"Engine {engine_id} meets the condition first at cycle {Degradation_Onset}.")
    
    # Plotting
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'EMA Trend for Engine {engine_id}', fontsize=16)
    
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        ema = group[f'{sensor}_EMA']
        ax.plot(group['time, in cycles'], ema, label='EMA', color='green')
        
        # Mark the first cycle that meets the condition
        if Degradation_Onset:
            ax.axvline(x=Degradation_Onset, color='red', linestyle='--', label='Degradation Onset')
        
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('EMA')
        ax.legend(loc='upper right')
    
    # Remove the last subplot if it’s not used
    fig.delaxes(axs[1, 2])
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_ema.png", dpi=300)
    # plt.show()
    plt.close(fig)

    # Add results to the list
    results.append({'unit number': engine_id, 'Degradation Onset': Degradation_Onset})

# Convert results to DataFrame and save to CSV once
results_df = pd.DataFrame(results)
# Save the results to a CSV file with consecutive_cycles in the filename
results_df.to_csv(f'results_{consecutive_cycles}_cycles.csv', index=False)

Engine 1 meets the condition first at cycle 137.
Engine 2 meets the condition first at cycle 135.
Engine 3 meets the condition first at cycle 158.
Engine 4 meets the condition first at cycle 132.
Engine 5 meets the condition first at cycle 10.
Engine 6 meets the condition first at cycle 55.
Engine 7 meets the condition first at cycle 29.
Engine 8 meets the condition first at cycle 24.
Engine 9 meets the condition first at cycle 16.
Engine 12 meets the condition first at cycle 16.
Engine 13 meets the condition first at cycle 17.
Engine 14 meets the condition first at cycle 238.
Engine 15 meets the condition first at cycle 10.
Engine 18 meets the condition first at cycle 45.
Engine 19 meets the condition first at cycle 138.
Engine 20 meets the condition first at cycle 206.
Engine 21 meets the condition first at cycle 49.
Engine 22 meets the condition first at cycle 96.
Engine 23 meets the condition first at cycle 73.
Engine 28 meets the condition first at cycle 113.
Engine 29 meets the c

In [5]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Check for 5 consecutive cycles with consistent direction, where at least 4 sensors are TRUE simultaneously for each engine
consecutive_cycles = 4
required_true_sensors = 3  # Minimum number of sensors required to have consecutive TRUE values simultaneously

output_dir = 'plots'
os.makedirs(output_dir, exist_ok=True)

# List to collect results for each engine
results = []

for engine_id, group in train.groupby('unit number'):
    # Create a DataFrame of consistent directions for each sensor
    consistent_directions = group[[f'{sensor}_EMA_consistent_direction' for sensor in top_sensors]]
    
    # Identify rows where at least 4 sensors are TRUE simultaneously
    sufficient_sensors_true = (consistent_directions.sum(axis=1) >= required_true_sensors)
    
    # Find rolling windows where this condition is TRUE for all 5 consecutive cycles
    consecutive_true = sufficient_sensors_true.rolling(window=consecutive_cycles).apply(lambda x: all(x), raw=True).fillna(0).astype(bool)
    
    # Get the index of the first cycle where the condition is met
    valid_cycles = group.loc[consecutive_true].index.tolist()
    Degradation_Onset = group.loc[valid_cycles[0], 'time, in cycles'] if valid_cycles else None
    
    # Print the first instance if it exists
    if Degradation_Onset:
        print(f"Engine {engine_id} meets the condition first at cycle {Degradation_Onset}.")
    
    # Plotting
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'EMA Trend for Engine {engine_id}', fontsize=16)
    
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        ema = group[f'{sensor}_EMA']
        ax.plot(group['time, in cycles'], ema, label='EMA', color='green')
        
        # Mark the first cycle that meets the condition
        if Degradation_Onset:
            ax.axvline(x=Degradation_Onset, color='red', linestyle='--', label='Degradation Onset')
        
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('EMA')
        ax.legend(loc='upper right')
    
    # Remove the last subplot if it’s not used
    fig.delaxes(axs[1, 2])
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_ema.png", dpi=300)
    # plt.show()
    plt.close(fig)

    # Add results to the list
    results.append({'unit number': engine_id, 'Degradation Onset': Degradation_Onset})

# Convert results to DataFrame and save to CSV once
results_df = pd.DataFrame(results)
# Save the results to a CSV file with consecutive_cycles in the filename
results_df.to_csv(f'results_{consecutive_cycles}_cycles.csv', index=False)

Engine 1 meets the condition first at cycle 23.
Engine 2 meets the condition first at cycle 13.
Engine 3 meets the condition first at cycle 157.
Engine 4 meets the condition first at cycle 81.
Engine 5 meets the condition first at cycle 9.
Engine 6 meets the condition first at cycle 54.
Engine 7 meets the condition first at cycle 28.
Engine 8 meets the condition first at cycle 14.
Engine 9 meets the condition first at cycle 15.
Engine 10 meets the condition first at cycle 50.
Engine 11 meets the condition first at cycle 244.
Engine 12 meets the condition first at cycle 15.
Engine 13 meets the condition first at cycle 16.
Engine 14 meets the condition first at cycle 47.
Engine 15 meets the condition first at cycle 9.
Engine 17 meets the condition first at cycle 69.
Engine 18 meets the condition first at cycle 44.
Engine 19 meets the condition first at cycle 137.
Engine 20 meets the condition first at cycle 205.
Engine 21 meets the condition first at cycle 48.
Engine 22 meets the conditi