In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# Load the data
train = pd.read_csv('train_FD004.csv', low_memory=False)

# Define a function to calculate Fisher score using first 50 and last 50 samples of each engine
def fisher_score_sensor(df, sensor, start_cycles=50, end_cycles=50):
    begin_life = df[df['time, in cycles'] <= start_cycles][sensor]
    end_life = df[df['time, in cycles'] >= (df['time, in cycles'].max() - end_cycles + 1)][sensor]
    mean_diff = abs(begin_life.mean() - end_life.mean())
    within_var = begin_life.var() + end_life.var()
    return mean_diff / within_var

# Apply Fisher score calculation across each sensor
sensor_columns = [col for col in train.columns if col.startswith('sensor')]
fisher_scores = {sensor: fisher_score_sensor(train, sensor) for sensor in sensor_columns}

# Select the top sensors based on Fisher scores
top_sensors = sorted(fisher_scores, key=fisher_scores.get, reverse=True)[:4]
print("Top sensors selected based on Fisher score:", top_sensors)

# Calculate RUL for each engine
train['RUL'] = train.groupby('unit number')['time, in cycles'].transform(lambda x: x.max() - x)

# Apply EMA for each top sensor
ema_span = 50
for sensor in top_sensors:
    train[f'{sensor}_EMA'] = train.groupby('unit number')[sensor].transform(lambda x: x.ewm(span=ema_span, adjust=False).mean())
train = train[['unit number', 'time, in cycles'] + [f'{sensor}_EMA' for sensor in top_sensors] + ['RUL']]
import matplotlib.pyplot as plt
import os
# Add gradient columns to check for consecutive direction
for sensor in top_sensors:
    # Calculate the gradient between consecutive EMA points
    train[f'{sensor}_EMA_gradient'] = train.groupby('unit number')[f'{sensor}_EMA'].diff()

# Define function to check if EMA direction is consistent over 5 cycles
def check_consistent_direction(df, sensor, window=5):
    gradients = df[f'{sensor}_EMA_gradient']
    direction = np.sign(gradients)
    # Check if within a rolling window of 5, all directions are the same (either all 1 or all -1)
    return direction.rolling(window=window).apply(lambda x: all(x == x[0]), raw=True).fillna(0).astype(bool)

# Apply this function for each sensor and create a column indicating consistent direction over 5 cycles
for sensor in top_sensors:
    train[f'{sensor}_EMA_consistent_direction'] = train.groupby('unit number').apply(
        lambda x: check_consistent_direction(x, sensor)
    ).reset_index(level=0, drop=True)

In [None]:
# Check for 5 consecutive cycles with consistent direction, where at least 4 sensors are TRUE simultaneously for each engine
consecutive_cycles = 5
required_true_sensors = 3  # Minimum number of sensors required to have consecutive TRUE values simultaneously

output_dir = 'plots'
os.makedirs(output_dir, exist_ok=True)

for engine_id, group in train.groupby('unit number'):
    # Create a DataFrame of consistent directions for each sensor
    consistent_directions = group[[f'{sensor}_EMA_consistent_direction' for sensor in top_sensors]]
    
    # Identify rows where at least 4 sensors are TRUE simultaneously
    sufficient_sensors_true = (consistent_directions.sum(axis=1) >= required_true_sensors)
    
    # Find rolling windows where this condition is TRUE for all 5 consecutive cycles
    consecutive_true = sufficient_sensors_true.rolling(window=consecutive_cycles).apply(lambda x: all(x), raw=True).fillna(0).astype(bool)
    
    # Get the index of the first cycle where the condition is met
    valid_cycles = group.loc[consecutive_true].index.tolist()
    Degradation_Onset = group.loc[valid_cycles[0], 'time, in cycles'] if valid_cycles else None
    
    # Print the first instance if it exists
    if Degradation_Onset:
        print(f"Engine {engine_id} meets the condition first at cycle {Degradation_Onset}.")
    
    # Plotting
    fig, axs = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle(f'EMA Trend for Engine {engine_id}', fontsize=16)
    
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        ema = group[f'{sensor}_EMA']
        ax.plot(group['time, in cycles'], ema, label='EMA', color='green')
        
        # Mark the first cycle that meets the condition
        if Degradation_Onset:
            ax.axvline(x=Degradation_Onset, color='red', linestyle='--', label='Degradation Onset')
        
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('EMA')
        ax.legend(loc='upper right')
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_ema.png", dpi=300)
    plt.show()
    plt.close(fig)

    # Store the engine ID and first valid cycle in a DataFrame
    results = pd.DataFrame({'unit number': [engine_id], 'Degradation Onset': [Degradation_Onset]})
    results.to_csv('results.csv', mode='a', index=False, header=not os.path.exists('results.csv'))