In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# Load the data
train = pd.read_csv('train_FD004.csv', low_memory=False)

# Define a function to calculate Fisher score using first 50 and last 50 samples of each engine
def fisher_score_sensor(df, sensor, start_cycles=50, end_cycles=50):
    begin_life = df[df['time, in cycles'] <= start_cycles][sensor]
    end_life = df[df['time, in cycles'] >= (df['time, in cycles'].max() - end_cycles + 1)][sensor]
    mean_diff = abs(begin_life.mean() - end_life.mean())
    within_var = begin_life.var() + end_life.var()
    return mean_diff / within_var

# Apply Fisher score calculation across each sensor
sensor_columns = [col for col in train.columns if col.startswith('sensor')]
fisher_scores = {sensor: fisher_score_sensor(train, sensor) for sensor in sensor_columns}

# Select the top sensors based on Fisher scores
top_sensors = sorted(fisher_scores, key=fisher_scores.get, reverse=True)[:6]
print("Top sensors selected based on Fisher score:", top_sensors)

# Calculate RUL for each engine
train['RUL'] = train.groupby('unit number')['time, in cycles'].transform(lambda x: x.max() - x)

# Apply EMA for each top sensor
ema_span = 50
for sensor in top_sensors:
    train[f'{sensor}_EMA'] = train.groupby('unit number')[sensor].transform(lambda x: x.ewm(span=ema_span, adjust=False).mean())

# Prepare the data using EMA features
ema_features = [f'{sensor}_EMA' for sensor in top_sensors]
X = train[ema_features]
y = train['RUL']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the scaler
joblib.dump(scaler, 'scaler_model.pkl')

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate RMSE
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE on Validation Set: {rmse}")

# Save the model
joblib.dump(model, 'linear_regression_model_with_ema.h5')

# Load the test data and actual RUL values
test = pd.read_csv('test_FD004.csv', low_memory=False)
RUL = pd.read_csv('RUL_FD004.csv', low_memory=False)

# Load the pre-trained model and scaler
model = joblib.load('linear_regression_model_with_ema.h5')
scaler = joblib.load('scaler_model.pkl')

# Apply EMA calculation to each sensor in the test dataset for each engine
for sensor in top_sensors:
    test[f'{sensor}_EMA'] = test.groupby('unit number')[sensor].transform(lambda x: x.ewm(span=ema_span, adjust=False).mean())

# Extract the unique unit numbers from the test dataset
unit_numbers = test['unit number'].unique()

# Insert the unique unit numbers into the RUL DataFrame
RUL['unit number'] = unit_numbers
RUL=RUL[['unit number','RUL']]
# Group the test dataset by each engine and compute the RUL prediction for each engine based on the EMA
engine_rul_predictions = []

for engine_id, group in test.groupby('unit number'):
    # Get the last calculated EMA values for each sensor of this engine
    X_test_engine = group[ema_features].iloc[-1].values.reshape(1, -1)

    # Scale the EMA features
    X_test_engine_scaled = scaler.transform(X_test_engine)

    # Predict RUL for the engine
    predicted_rul = model.predict(X_test_engine_scaled)[0]

    # Get the actual RUL for this engine from the RUL dataset
    actual_rul = RUL.loc[RUL['unit number'] == engine_id, 'RUL'].values[0]

    # Append results
    engine_rul_predictions.append((engine_id, actual_rul, predicted_rul))

# Convert predictions to a DataFrame for easier analysis
engine_rul_predictions_df = pd.DataFrame(engine_rul_predictions, columns=['Engine ID', 'Actual RUL', 'Predicted RUL'])

# Calculate RMSE for the predictions
rmse_test = root_mean_squared_error(engine_rul_predictions_df['Actual RUL'], engine_rul_predictions_df['Predicted RUL'])
print(f"RMSE on Test Set: {rmse_test}")

# Display the predictions DataFrame
print(engine_rul_predictions_df.head())

In [None]:
import matplotlib.pyplot as plt
import os
# Visualizations
output_dir = 'plots'
os.makedirs(output_dir, exist_ok=True)
for engine_id, group in train.groupby('unit number'):
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'EMA Trend for Engine {engine_id}', fontsize=16)
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        ema = group[f'{sensor}_EMA']
        ax.plot(group['time, in cycles'], ema, label='EMA', color='green')
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('EMA')
        ax.legend(loc='upper right')
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_ema.png", dpi=300)
    plt.show()
    plt.close(fig)