In [None]:
import pandas as pd
import numpy as np



In [None]:
data = pd.read_csv('/content/drive/MyDrive/Predicting engine health/data/engine_data.csv')

In [None]:
data.head()


Unnamed: 0,Engine rpm,Lub oil pressure,Fuel pressure,Coolant pressure,lub oil temp,Coolant temp,Engine Condition
0,700,2.493592,11.790927,3.178981,84.144163,81.632187,1
1,876,2.941606,16.193866,2.464504,77.640934,82.445724,0
2,520,2.961746,6.553147,1.064347,77.752266,79.645777,1
3,473,3.707835,19.510172,3.727455,74.129907,71.774629,1
4,619,5.672919,15.738871,2.052251,78.396989,87.000225,0


In [None]:

np.where(pd.isnull(data))

(array([], dtype=int64), array([], dtype=int64))

IN terms of engine conditions, going to assume the value with the greatest frequency is considered 'normal' ie. not failing.

Assuming 1 is 'normal'

In [None]:
data['Engine Condition'].value_counts()

1    12317
0     7218
Name: Engine Condition, dtype: int64

Designing a Hybrid LSTM-CNN Model to predict engine failure.

In [1]:


import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split

# Define a custom dataset class for loading the CSV data
class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.features = self.data.drop('Engine Condition', axis=1).values
        self.targets = self.data['Engine Condition'].values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.targets[index]

        # Convert the data to appropriate tensors
        x = torch.from_numpy(x).float()
        y = torch.tensor(y).long()

        return x, y

# Define the file path for your data
csv_file = '/content/drive/MyDrive/Predicting engine health/data/engine_data.csv'

# Create an instance of the custom dataset
dataset = CustomDataset(csv_file)

# Define the hyperparameters and model architecture
input_size = len(dataset.features[0])
hidden_size = 64
num_classes = 2

# Set the random seed for reproducibility (optional)
torch.manual_seed(17)

# Split the dataset into train, validation, and test sets

# Split the data into train and remaining data
train_data, remaining_data = train_test_split(dataset, test_size=0.2, random_state=17)

# Split the remaining data into validation and test sets
val_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=17)

# Create data loaders for training, validation, and testing sets
batch_size =128
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Define the hybrid LSTM-CNN model

class HybridModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(HybridModel, self).__init__()
        self.lstm = nn.LSTM(1, hidden_size, batch_first=True)
        self.cnn = nn.Conv1d(hidden_size, hidden_size, kernel_size=3)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        lstm_output, _ = self.lstm(x.unsqueeze(-1))
        lstm_output = lstm_output.transpose(1, 2)
        cnn_output = self.cnn(lstm_output)
        cnn_output = torch.mean(cnn_output, dim=2)
        output = self.fc(cnn_output)
        return output

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the number of folds for cross-validation
num_folds = 25

# Check if a GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



#testing class

class Tester:
    def __init__(self, model, test_loader, criterion, device):
        self.model = model
        self.test_loader = test_loader
        self.criterion = criterion
        self.device = device

    def test(self):
        self.model.eval()
        test_loss = 0.0
        test_predictions = []
        test_targets = []

        with torch.no_grad():
            for inputs, targets in self.test_loader:
                inputs = inputs.to(self.device)
                targets = targets.to(self.device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)
                test_loss += loss.item() * inputs.size(0)
                _, predictions = torch.max(outputs, 1)
                test_predictions.extend(predictions.tolist())
                test_targets.extend(targets.tolist())

        test_loss /= len(self.test_loader.dataset)
        test_accuracy = accuracy_score(test_targets, test_predictions)
        test_precision = precision_score(test_targets, test_predictions, average='weighted')
        test_recall = recall_score(test_targets, test_predictions, average='weighted')
        test_f1 = f1_score(test_targets, test_predictions, average='weighted')
        test_auc = roc_auc_score(test_targets, test_predictions)
        test_cm = confusion_matrix(test_targets, test_predictions)

        self.test_loss = test_loss
        self.test_accuracy = test_accuracy
        self.test_precision = test_precision
        self.test_recall = test_recall
        self.test_f1 = test_f1
        self.test_auc = test_auc
        self.test_cm = test_cm

        print('Test Metrics:')
        print(f'Loss: {test_loss:.4f}')
        print(f'Accuracy: {test_accuracy:.4f}')
        print(f'Precision: {test_precision:.4f}')
        print(f'Recall: {test_recall:.4f}')
        print(f'F1 Score: {test_f1:.4f}')
        print(f'AUC: {test_auc:.4f}')
        print('Confusion Matrix:')
        print(test_cm)



# Define the number of folds for cross-validation
num_folds = 10

# Initialize lists to store the evaluation metrics for each fold
val_losses = []
val_accuracies = []
val_precisions = []
val_recalls = []
val_f1_scores = []
val_aucs = []
val_cms = []

# Cross-validation loop
for fold in range(num_folds):
    print(f"Fold [{fold + 1}/{num_folds}]")

    # Split the data into train and validation sets for the current fold
    train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=fold)

    # Create data loaders for the current fold
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

    # Initialize the model for the current fold
    model = HybridModel(input_size, hidden_size, num_classes).to(device)

    # Set the number of epochs and early stopping criteria for the current fold
    num_epochs = 13
    early_stopping_counter = 0
    early_stopping_threshold = 3
    best_eval_loss = float('inf')
    best_model = None

    # Define the optimizer for the current fold
    optimizer = optim.Adam(model.parameters())

    # Training loop for the current fold
    for epoch in range(num_epochs):
        train_loss = 0.0

        # Set the model in training mode
        model.train()

        for inputs, targets in train_loader:
            # Move inputs and targets to the device (e.g., GPU)
            inputs = inputs.to(device)
            targets = targets.to(device)

            # Clear the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)

            # Calculate the loss
            loss = criterion(outputs, targets)

            # Backward pass
            loss.backward()

            # Update the weights
            optimizer.step()

            # Accumulate the loss
            train_loss += loss.item() * inputs.size(0)

        # Calculate the average training loss for the epoch
        train_loss /= len(train_data)

        # Evaluation loop for the current fold
        eval_loss = 0.0
        eval_predictions = []
        eval_targets = []

        # Set the model in evaluation mode
        model.eval()

        with torch.no_grad():
            for inputs, targets in val_loader:
                # Move inputs and targets to the device (e.g., GPU)
                inputs = inputs.to(device)
                targets = targets.to(device)

                # Forward pass
                outputs = model(inputs)

                # Calculate the loss
                loss = criterion(outputs, targets)

                # Accumulate the loss
                eval_loss += loss.item() * inputs.size(0)

                # Get the predicted labels
                _, predictions = torch.max(outputs, 1)

                # Collect the predictions and targets for evaluation metrics
                eval_predictions.extend(predictions.tolist())
                eval_targets.extend(targets.tolist())

        # Calculate the average evaluation loss for the epoch
        eval_loss /= len(val_data)

        # Check if the current evaluation loss is the best so far
        if eval_loss < best_eval_loss:
            best_eval_loss = eval_loss
            best_model = model.state_dict()
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        # Print the training and evaluation loss for the epoch
        print(f'Epoch [{epoch + 1}/{num_epochs}] - Training Loss: {train_loss:.4f} - Evaluation Loss: {eval_loss:.4f}')

        # Check if early stopping criteria are met
        if early_stopping_counter >= early_stopping_threshold:
            print("Early stopping triggered! No improvement seen for 3 epochs.")
            break

    # Load the best model for the current fold
    model.load_state_dict(best_model)

    # Create a new data loader for the test set
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    # Create a tester instance for the current fold
    tester = Tester(model, test_loader, criterion, device)

    # Perform testing on the test set
    tester.test()

    # Store the evaluation metrics for the current fold
    val_losses.append(eval_loss)
    val_accuracies.append(tester.test_accuracy)
    val_precisions.append(tester.test_precision)
    val_recalls.append(tester.test_recall)
    val_f1_scores.append(tester.test_f1)
    val_aucs.append(tester.test_auc)
    val_cms.append(tester.test_cm)

# Calculate the average evaluation metrics across all folds
avg_val_loss = np.mean(val_losses)
avg_val_accuracy = np.mean(val_accuracies)
avg_val_precision = np.mean(val_precisions)
avg_val_recall = np.mean(val_recalls)
avg_val_f1_score = np.mean(val_f1_scores)
avg_val_auc = np.mean(val_aucs)
avg_val_cm = np.mean(val_cms, axis=0)

# Print the average evaluation metrics
print('Average Validation Metrics:')
print(f'Loss: {avg_val_loss:.4f}')
print(f'Accuracy: {avg_val_accuracy:.4f}')
print(f'Precision: {avg_val_precision:.4f}')
print(f'Recall: {avg_val_recall:.4f}')
print(f'F1 Score: {avg_val_f1_score:.4f}')
print(f'AUC: {avg_val_auc:.4f}')
print('Confusion Matrix:')
print(avg_val_cm)

# Now, perform testing on the test set using the best model
# Create a new data loader for the test set
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Create a tester instance for the final test
final_tester = Tester(model, test_loader, criterion, device)

# Perform testing on the test set
final_tester.test()

# Print the evaluation metrics on the test set
print('Test Metrics:')
print(f'Loss: {final_tester.test_loss:.4f}')
print(f'Accuracy: {final_tester.test_accuracy:.4f}')
print(f'Precision: {final_tester.test_precision:.4f}')
print(f'Recall: {final_tester.test_recall:.4f}')
print(f'F1 Score: {final_tester.test_f1:.4f}')
print(f'AUC: {final_tester.test_auc:.4f}')
print('Confusion Matrix:')
print(final_tester.test_cm)

Fold [1/10]
Epoch [1/13] - Training Loss: 0.6571 - Evaluation Loss: 0.6478
Epoch [2/13] - Training Loss: 0.6458 - Evaluation Loss: 0.6270
Epoch [3/13] - Training Loss: 0.6300 - Evaluation Loss: 0.6065
Epoch [4/13] - Training Loss: 0.6145 - Evaluation Loss: 0.6668
Epoch [5/13] - Training Loss: 0.6190 - Evaluation Loss: 0.6092
Epoch [6/13] - Training Loss: 0.6114 - Evaluation Loss: 0.6057
Epoch [7/13] - Training Loss: 0.6111 - Evaluation Loss: 0.6040
Epoch [8/13] - Training Loss: 0.6123 - Evaluation Loss: 0.6055
Epoch [9/13] - Training Loss: 0.6113 - Evaluation Loss: 0.6120
Epoch [10/13] - Training Loss: 0.6122 - Evaluation Loss: 0.6005
Epoch [11/13] - Training Loss: 0.6070 - Evaluation Loss: 0.6000
Epoch [12/13] - Training Loss: 0.6109 - Evaluation Loss: 0.6013
Epoch [13/13] - Training Loss: 0.6082 - Evaluation Loss: 0.5996
Test Metrics:
Loss: 0.5991
Accuracy: 0.6781
Precision: 0.6635
Recall: 0.6781
F1 Score: 0.6566
AUC: 0.6134
Confusion Matrix:
[[ 266  449]
 [ 180 1059]]
Fold [2/10]
Ep