In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, root_mean_squared_error  
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
# Load the data
train = pd.read_csv('train_FD004.csv', low_memory=False)
# Define a function to calculate Fisher score using first 50 and last 50 samples of each engine
def fisher_score_sensor(df, sensor, start_cycles=50, end_cycles=50):
    # Extract beginning of life (first 50 cycles) and end of life (last 50 cycles) samples
    begin_life = df[df['time, in cycles'] <= start_cycles][sensor]
    end_life = df[df['time, in cycles'] >= (df['time, in cycles'].max() - end_cycles + 1)][sensor]
    
    # Calculate the Fisher score for the sensor
    mean_diff = abs(begin_life.mean() - end_life.mean())
    within_var = begin_life.var() + end_life.var()
    fisher_score = mean_diff / within_var
    return fisher_score

# Apply Fisher score calculation across each sensor
sensor_columns = [col for col in train.columns if col.startswith('sensor')]
fisher_scores = {sensor: fisher_score_sensor(train, sensor) for sensor in sensor_columns}

# Select the top sensors based on Fisher scores
top_sensors = sorted(fisher_scores, key=fisher_scores.get, reverse=True)[:6]  # adjust N as needed

print("Top sensors selected based on Fisher score:", top_sensors)
# Calculate RUL for each engine
grouped_by_engine = train.groupby('unit number')

# Initialize the RUL column
train['RUL'] = 0

# Calculate RUL for each engine
for name, group in grouped_by_engine:
    max_cycle = group['time, in cycles'].max()
    train.loc[train['unit number'] == name, 'RUL'] = max_cycle - group['time, in cycles']

# Display a few rows to check
print(train[['unit number', 'time, in cycles', 'RUL']].head())
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Prepare the data with only the top sensors
X = train[top_sensors]
y = train['RUL']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate RMSE
rmse = root_mean_squared_error(y_test, y_pred)
print(f"RMSE: {rmse}")

# Print RUL predictions for each engine in the test set
engine_rul_predictions = pd.DataFrame({
    'True RUL': y_test,
    'Predicted RUL': y_pred
})

# Reset index for clearer display
engine_rul_predictions = engine_rul_predictions.reset_index(drop=True)
print(engine_rul_predictions)
# Save the Linear Regression model as h5 file
joblib.dump(model, 'linear_regression_model.h5')
# Load the model, test_FD004 and test_FD004_RUL datasets
model = joblib.load('linear_regression_model.h5')
test = pd.read_csv('test_FD004.csv', low_memory=False)
RUL = pd.read_csv('RUL_FD004.csv', low_memory=False)
# Extract the last row of each engine in the test set
test_last_cycle = test.groupby('unit number').last().reset_index()

# Shape of the test_last_cycle
test_last_cycle.shape
# Concatenate the test_last_cycle with the RUL dataset
test_last_cycle = pd.concat([test_last_cycle, RUL], axis=1)
test_last_cycle.head()
# Calculate the RUL for each engine in the test set using the Linear Regression model
X_test_last_cycle = test_last_cycle[top_sensors]
X_test_last_cycle = scaler.transform(X_test_last_cycle)
test_last_cycle['Predicted RUL'] = model.predict(X_test_last_cycle)

# Display the RUL predictions
print(test_last_cycle[['unit number', 'RUL', 'Predicted RUL']])
# Calculate RMSE for the RUL predictions
rmse = root_mean_squared_error(test_last_cycle['RUL'], test_last_cycle['Predicted RUL'])
print(f"RMSE: {rmse}")

In [None]:
import matplotlib.pyplot as plt
import os

# Ensure directory for saving images exists
output_dir = 'plots'
os.makedirs(output_dir, exist_ok=True)

# Parameters for rolling mean, EMA, and statistical features
window_size = 50  # Window size for moving average
ema_span = 50     # Span for EMA

# Iterate through each engine and create separate plots for each feature type
for engine_id, group in train.groupby('unit number'):
    
    # --- Plot Rolling Mean ---
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'Rolling Mean Trend for Engine {engine_id}', fontsize=16)
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        rolling_mean = group[sensor].rolling(window=window_size, center=False).mean()
        ax.plot(group['time, in cycles'], rolling_mean, label='Rolling Mean', color='blue', linestyle='--')
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('Rolling Mean')
        ax.legend(loc='upper right')
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_rolling_mean.png", dpi=300)
    plt.close(fig)
    
    # --- Plot Exponential Moving Average (EMA) ---
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'EMA Trend for Engine {engine_id}', fontsize=16)
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        ema = group[sensor].ewm(span=ema_span, adjust=False).mean()
        ax.plot(group['time, in cycles'], ema, label='EMA', color='green', linestyle='-')
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('EMA')
        ax.legend(loc='upper right')
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_ema.png", dpi=300)
    plt.show()
    plt.close(fig)
    
    # --- Plot Additional Statistical Features (Mean and Std Dev) ---
    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    fig.suptitle(f'Statistical Features for Engine {engine_id}', fontsize=16)
    for i, sensor in enumerate(top_sensors):
        ax = axs[i // 3, i % 3]
        mean_val = group[sensor].expanding().mean()
        std_dev = group[sensor].expanding().std()
        ax.plot(group['time, in cycles'], mean_val, label='Mean', color='purple', linestyle='-.')
        ax.plot(group['time, in cycles'], std_dev, label='Std Dev', color='orange', linestyle=':')
        ax.set_title(sensor)
        ax.set_xlabel('Cycles')
        ax.set_ylabel('Statistical Feature')
        ax.legend(loc='upper right')
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{output_dir}/engine_{engine_id}_statistical_features.png", dpi=300)
    plt.show()
    plt.close(fig)

In [None]:
# import matplotlib.pyplot as plt

# # Parameters for rolling mean and EMA
# window_size = 50  # Window size for moving average
# ema_span = 50     # Span for EMA

# # Iterate through each engine and plot trends
# for engine_id, group in train.groupby('unit number'):
#     fig, axs = plt.subplots(2, 3, figsize=(15, 10))
#     fig.suptitle(f'Rolling Mean / EMA Trend for Engine {engine_id}', fontsize=16)
    
#     for i, sensor in enumerate(top_sensors):
#         ax = axs[i // 3, i % 3]
        
#         # Calculate rolling mean and EMA for the current sensor
#         rolling_mean = group[sensor].rolling(window=window_size, center=False).mean()
#         ema = group[sensor].ewm(span=ema_span, adjust=False).mean()
        
#         # Plot only the trend data
#         ax.plot(group['time, in cycles'], rolling_mean, label='Rolling Mean', color='blue', linestyle='--')
#         ax.plot(group['time, in cycles'], ema, label='EMA', color='green', linestyle='-')
        
#         # Title and axis labels
#         ax.set_title(sensor)
#         ax.set_xlabel('Cycles')
#         ax.set_ylabel('Trend')
    
#     # Adjust layout and place legend outside of the plot area
#     handles, labels = ax.get_legend_handles_labels()
#     fig.legend(handles, labels, loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=2, fontsize='large')
#     plt.tight_layout(rect=[0, 0, 1, 0.96])
    
#     # Show plot
#     plt.show()