In [9]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nasa-cmaps/CMaps/RUL_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/Damage Propagation Modeling.pdf
/kaggle/input/nasa-cmaps/CMaps/readme.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/x.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD001.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD001.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD001.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/RUL_FD002.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/test_FD003.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/Damage Propagation Modeling.pdf
/kaggle/input/nasa-cmaps/cmaps/CMaps/readme.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/train_FD003.txt
/kaggle/input/nasa-cmaps/cmaps/CM

In [None]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def load_data(file_path):
    df = pd.read_csv(file_path, sep=r'\s+', header=None)
    df = df.iloc[:, :26] # Select only the first 26 columns

    df.columns = ["unit_number", "time_in_cycles", "op_setting_1", "op_setting_2", "op_setting_3"] + \
                 [f"sensor_measurement_{i}" for i in range(1, 22)]
    return df

def calculate_rul(df):
    max_cycles = df.groupby('unit_number')['time_in_cycles'].max().reset_index()
    max_cycles.columns = ['unit_number', 'max_cycles']
    df = pd.merge(df, max_cycles, on='unit_number', how='left')
    df['RUL'] = df['max_cycles'] - df['time_in_cycles']
    
    RUL_MAX = 125
    df['RUL'] = df['RUL'].apply(lambda x: min(x, RUL_MAX))
    
    df.drop(columns=['max_cycles'], inplace=True)
    return df

def select_features(df):
    drop_sensors = [f'sensor_measurement_{i}' for i in [1, 5, 6, 10, 16, 18, 19]]
    drop_settings = ['op_setting_3']
    
    features_to_keep = [col for col in df.columns if col not in drop_sensors + drop_settings]
    return df[features_to_keep]

def create_sequences(df, sequence_length, sensor_cols, op_setting_cols):
    X, y = [], []
    features = sensor_cols + op_setting_cols
    
    for unit_number in df['unit_number'].unique():
        unit_df = df[df['unit_number'] == unit_number].copy()
        unit_df = unit_df.sort_values(by='time_in_cycles')
        
        for i in range(len(unit_df) - sequence_length + 1):
            X.append(unit_df[features].iloc[i:i+sequence_length].values)
            y.append(unit_df['RUL'].iloc[i+sequence_length-1])
            
    return np.array(X), np.array(y)

def preprocess_dataset(dataset_id, sequence_length=50, rul_max=125):

    sensor_cols = [f"sensor_measurement_{i}" for i in range(1, 22) if i not in [1, 5, 6, 10, 16, 18, 19]]
    op_setting_cols = ["op_setting_1", "op_setting_2"]
    all_feature_cols = sensor_cols + op_setting_cols


    train_file_path = f"/kaggle/input/nasa-cmaps/cmaps/CMaps/train_{dataset_id}.txt"
    train_df = load_data(train_file_path)
    train_df = calculate_rul(train_df)
    train_df = select_features(train_df)

    # Normalize training data
    scaler = MinMaxScaler()
    train_df[all_feature_cols] = scaler.fit_transform(train_df[all_feature_cols])

    # Create sequences for training data
    X_train, y_train = create_sequences(train_df, sequence_length, sensor_cols, op_setting_cols)

    # Load test data
    test_file_path = f"/kaggle/input/nasa-cmaps/cmaps/CMaps/test_{dataset_id}.txt"
    test_df = load_data(test_file_path)
    test_df = select_features(test_df)

    # Normalize test data using the scaler fitted on training data
    test_df[all_feature_cols] = scaler.transform(test_df[all_feature_cols])

    # Create sequences for test data
    X_test_list = []
    for unit_number in test_df['unit_number'].unique():
        unit_df = test_df[test_df['unit_number'] == unit_number].copy()
        unit_df = unit_df.sort_values(by='time_in_cycles')
        
        if len(unit_df) >= sequence_length:
            X_test_list.append(unit_df[all_feature_cols].iloc[-sequence_length:].values)
        else:
            padded_sequence = np.zeros((sequence_length, len(all_feature_cols)))
            padded_sequence[-len(unit_df):] = unit_df[all_feature_cols].values
            X_test_list.append(padded_sequence)

    X_test = np.array(X_test_list)


    rul_test_file_path = f"/kaggle/input/nasa-cmaps/cmaps/CMaps/RUL_{dataset_id}.txt"
    y_test_true = pd.read_csv(rul_test_file_path, sep=r'\s+', header=None)
    y_test_true = y_test_true.iloc[:, 0].values
    y_test_true = np.array([min(x, rul_max) for x in y_test_true]) # Apply RUL capping

    return X_train, y_train, X_test, y_test_true,scaler

if __name__ == "__main__":
    dataset_id = "FD001"
    SEQUENCE_LENGTH = 50
    RUL_MAX = 125

    X_train, y_train, X_test, y_test_true,scaler = preprocess_dataset(dataset_id, SEQUENCE_LENGTH, RUL_MAX)

    print(f"Processed {dataset_id}:")
    print(f"Shape of X_train: {X_train.shape}")
    print(f"Shape of y_train: {y_train.shape}")
    print(f"Shape of X_test: {X_test.shape}")
    print(f"Shape of y_test_true: {y_test_true.shape}")

    np.save(f"X_train_{dataset_id}.npy", X_train)
    np.save(f"y_train_{dataset_id}.npy", y_train)
    np.save(f"X_test_{dataset_id}.npy", X_test)
    np.save(f"y_test_true_{dataset_id}.npy", y_test_true)




Processed FD001:
Shape of X_train: (15731, 50, 16)
Shape of y_train: (15731,)
Shape of X_test: (100, 50, 16)
Shape of y_test_true: (100,)


In [16]:
import joblib
joblib.dump(scaler, 'transformer_scaler.pkl')


['transformer_scaler.pkl']

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class GatedConvUnit(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size=3):
        super(GatedConvUnit, self).__init__()
        self.conv = nn.Conv1d(input_dim, output_dim, kernel_size, padding=kernel_size // 2)
        self.gate = nn.Conv1d(input_dim, output_dim, kernel_size, padding=kernel_size // 2)

    def forward(self, x):
        # x: (batch_size, sequence_length, input_dim)
        # Conv1d expects (batch_size, input_dim, sequence_length)
        x = x.permute(0, 2, 1) 
        
        conv_out = self.conv(x)
        gate_out = torch.sigmoid(self.gate(x))
        
        # Permute back to (batch_size, sequence_length, output_dim)
        return (conv_out * gate_out).permute(0, 2, 1)

class MultiHeadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads
        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"

        self.wq = nn.Linear(embed_dim, embed_dim)
        self.wk = nn.Linear(embed_dim, embed_dim)
        self.wv = nn.Linear(embed_dim, embed_dim)
        self.dense = nn.Linear(embed_dim, embed_dim)

    def split_heads(self, x, batch_size):
        x = x.view(batch_size, -1, self.num_heads, self.head_dim)
        return x.permute(0, 2, 1, 3) # (batch_size, num_heads, seq_len, head_dim)

    def forward(self, q, k, v, mask=None):
        batch_size = q.size(0)

        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        # Scaled dot-product 
        matmul_qk = torch.matmul(q, k.permute(0, 1, 3, 2)) 
        dk = torch.tensor(self.head_dim, dtype=torch.float32)
        scaled_attention_logits = matmul_qk / torch.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = F.softmax(scaled_attention_logits, dim=-1)
        output = torch.matmul(attention_weights, v)

        output = output.permute(0, 2, 1, 3).contiguous()
        output = output.view(batch_size, -1, self.embed_dim)

        return self.dense(output), attention_weights

class FeedForwardNetwork(nn.Module):
    def __init__(self, embed_dim, dff):
        super(FeedForwardNetwork, self).__init__()
        self.linear1 = nn.Linear(embed_dim, dff)
        self.linear2 = nn.Linear(dff, embed_dim)

    def forward(self, x):
        return self.linear2(F.relu(self.linear1(x)))

class EncoderLayer(nn.Module):
    def __init__(self, embed_dim, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(embed_dim, num_heads)
        self.ffn = FeedForwardNetwork(embed_dim, dff)

        self.layernorm1 = nn.LayerNorm(embed_dim)
        self.layernorm2 = nn.LayerNorm(embed_dim)

        self.dropout1 = nn.Dropout(rate)
        self.dropout2 = nn.Dropout(rate)

    def forward(self, x, mask=None):
        attn_output, _ = self.mha(x, x, x, mask) # Self-attention
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(x + attn_output) # Add & Norm

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        out2 = self.layernorm2(out1 + ffn_output) # Add & Norm

        return out2

class TransformerRUL(nn.Module):
    def __init__(self, input_dim, embed_dim, num_layers, num_heads, dff, rate=0.1, max_rul=125):
        super(TransformerRUL, self).__init__()
        self.embed_dim = embed_dim
        self.max_rul = max_rul

        # Local Feature Extraction Layer
        self.gcu = GatedConvUnit(input_dim, embed_dim)
        self.linear_gcu = nn.Linear(embed_dim, embed_dim)
        
        self.pos_encoding = self.positional_encoding(1000, embed_dim) # Assuming max sequence length of 1000

        # Encoder Layers
        self.encoder_layers = nn.ModuleList([
            EncoderLayer(embed_dim, num_heads, dff, rate) 
            for _ in range(num_layers)
        ])

        # Regression Layer
        self.regression_linear = nn.Linear(embed_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def positional_encoding(self, position, d_model):
        angle_rads = self.get_angles(torch.arange(position).unsqueeze(1), 
                                     torch.arange(d_model).unsqueeze(0), 
                                     d_model)
        

        angle_rads[:, 0::2] = torch.sin(angle_rads[:, 0::2])
        
        angle_rads[:, 1::2] = torch.cos(angle_rads[:, 1::2])
        
        pos_encoding = angle_rads.unsqueeze(0)
        return pos_encoding

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / torch.pow(10000, (2 * (i // 2)) / torch.tensor(d_model, dtype=torch.float32))
        return pos * angle_rates

    def forward(self, x, mask=None):
        # x: (batch_size, sequence_length, input_dim)

        x = self.gcu(x) # (batch_size, sequence_length, embed_dim)
        x = self.linear_gcu(x)
        
        # Add positional encoding
        seq_len = x.size(1)
        x += self.pos_encoding[:, :seq_len, :].to(x.device)

        # Encoder Layers
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x, mask)

        x = torch.mean(x, dim=1) # (batch_size, embed_dim)
        
        output = self.regression_linear(x)
        output = self.sigmoid(output) * self.max_rul # Scale sigmoid output to max_rul

        return output




In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd


SEQUENCE_LENGTH = 50
MAX_RUL = 125
embed_dim = 64
num_layers = 2
num_heads = 4
dff = 128
rate = 0.1

num_epochs = 100 
batch_size = 32
learning_rate = 0.001

PATIENCE = 10
min_delta = 0.001

def train_and_evaluate_dataset(dataset_id):
    print(f"\n--- Processing Dataset: {dataset_id} ---")
    X_train, y_train, X_test, y_test_true = preprocess_dataset(dataset_id, SEQUENCE_LENGTH, MAX_RUL)


    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

    # Create DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Initialize model, loss function, and optimizer
    input_dim = X_train.shape[2] # Number of features
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = TransformerRUL(input_dim, embed_dim, num_layers, num_heads, dff, rate, MAX_RUL).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    best_loss = float("inf")
    epochs_no_improve = 0

    print("Starting model training...")
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_train_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {avg_train_loss:.4f}")


        if avg_train_loss + min_delta < best_loss:
            best_loss = avg_train_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), f"transformer_rul_model_{dataset_id}.pth")
        else:
            epochs_no_improve += 1
            if epochs_no_improve == PATIENCE:
                print(f"Early stopping triggered after {epoch+1} epochs for {dataset_id}.")
                break

    print("Training complete.")
    

    model.load_state_dict(torch.load(f"transformer_rul_model_{dataset_id}.pth", map_location=device))
    model.eval() 

    # Evaluation on test set
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_true_tensor = torch.tensor(y_test_true, dtype=torch.float32).unsqueeze(1).to(device)

    with torch.no_grad():
        predictions = model(X_test_tensor).cpu().numpy().flatten()
    
    actuals = y_test_true_tensor.cpu().numpy().flatten()

    # Calculate RMSE
    rmse = np.sqrt(nn.MSELoss()(torch.tensor(predictions), torch.tensor(actuals)).item())
    print(f"Root Mean Squared Error (RMSE) for {dataset_id}: {rmse:.4f}")

    def calculate_cmapss_score(y_true, y_pred):
        d = y_pred - y_true
        score = 0
        for val in d:
            if val < 0:
                score += (np.exp(-val/13) - 1)
            else:
                score += (np.exp(val/10) - 1)
        return score

    cmapss_score = calculate_cmapss_score(actuals, predictions)
    print(f"C-MAPSS Score for {dataset_id}: {cmapss_score:.4f}")

    pd.DataFrame({"Actual_RUL": actuals, "Predicted_RUL": predictions}).to_csv(f"rul_predictions_{dataset_id}.csv", index=False)
    print(f"Predictions saved to rul_predictions_{dataset_id}.csv")

    return rmse, cmapss_score

if __name__ == "__main__":
    dataset_ids = ["FD001", "FD002", "FD003", "FD004"]
    results = {}

    for dataset_id in dataset_ids:
        rmse, cmapss_score = train_and_evaluate_dataset(dataset_id)
        results[dataset_id] = {"RMSE": rmse, "C-MAPSS Score": cmapss_score}

    print("\n--- Overall Results ---")
    for dataset_id, metrics in results.items():
        print(f"Dataset {dataset_id}: RMSE = {metrics['RMSE']:.4f}, C-MAPSS Score = {metrics['C-MAPSS Score']:.4f}")





--- Processing Dataset: FD001 ---
Starting model training...
Epoch 1/100, Training Loss: 337.1564
Epoch 2/100, Training Loss: 140.9892
Epoch 3/100, Training Loss: 130.4279
Epoch 4/100, Training Loss: 127.9151
Epoch 5/100, Training Loss: 119.4798
Epoch 6/100, Training Loss: 116.0045
Epoch 7/100, Training Loss: 111.9500
Epoch 8/100, Training Loss: 108.0742
Epoch 9/100, Training Loss: 102.0355
Epoch 10/100, Training Loss: 90.1225
Epoch 11/100, Training Loss: 82.2942
Epoch 12/100, Training Loss: 70.2651
Epoch 13/100, Training Loss: 63.1576
Epoch 14/100, Training Loss: 56.1699
Epoch 15/100, Training Loss: 53.0923
Epoch 16/100, Training Loss: 46.7204
Epoch 17/100, Training Loss: 38.0857
Epoch 18/100, Training Loss: 35.4311
Epoch 19/100, Training Loss: 30.5769
Epoch 20/100, Training Loss: 29.0136
Epoch 21/100, Training Loss: 24.5454
Epoch 22/100, Training Loss: 21.4744
Epoch 23/100, Training Loss: 20.7225
Epoch 24/100, Training Loss: 18.8738
Epoch 25/100, Training Loss: 16.6623
Epoch 26/100,

In [None]:

import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
from torch.utils.data import DataLoader, TensorDataset

from sklearn.metrics import mean_squared_error

X_test = np.load("/kaggle/working/X_test_FD001.npy")
y_test_true = np.load("/kaggle/working/y_test_true_FD001.npy")


X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_true_tensor = torch.tensor(y_test_true, dtype=torch.float32).unsqueeze(1)


test_dataset = TensorDataset(X_test_tensor, y_test_true_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X_test.shape[2]
embed_dim = 64
num_layers = 2
num_heads = 4
dff = 128
rate = 0.1 
MAX_RUL = 125


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerRUL(input_dim, embed_dim, num_layers, num_heads, dff, rate, MAX_RUL).to(device)
model.load_state_dict(torch.load("/kaggle/working/transformer_rul_model_FD001.pth", map_location=device))
model.eval() 

print("Starting model evaluation...")

predictions = []
actuals = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        outputs = model(batch_X)
        predictions.extend(outputs.cpu().numpy().flatten())
        actuals.extend(batch_y.cpu().numpy().flatten())

predictions = np.array(predictions)
actuals = np.array(actuals)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(actuals, predictions))
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

def calculate_cmapss_score(y_true, y_pred):
    d = y_pred - y_true
    score = 0
    for val in d:
        if val < 0:
            score += (np.exp(-val/13) - 1)
        else:
            score += (np.exp(val/10) - 1)
    return score

score = calculate_cmapss_score(actuals, predictions)
print(f"C-MAPSS Score: {score:.4f}")

pd.DataFrame({"Actual_RUL": actuals, "Predicted_RUL": predictions}).to_csv("rul_predictions.csv", index=False)
print("Predictions saved to rul_predictions.csv")




Starting model evaluation...
Root Mean Squared Error (RMSE): 15.0819
C-MAPSS Score: 562.0964
Predictions saved to rul_predictions.csv
