A hybrid model approach to predicting engine failure

In [9]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# Define a custom dataset class for loading the CSV data

class CustomDataset(Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        self.features = self.data.drop('Engine Condition', axis=1).values
        self.targets = pd.get_dummies(self.data['Engine Condition']).values

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.targets[index]

        # Convert the data to appropriate tensors
        x = torch.from_numpy(x).float()
        y = torch.from_numpy(y).float()

        return x, y

# Define the file path for your data
csv_file = '/content/drive/MyDrive/Predicting engine health/data/engine_data.csv'

# Create an instance of the custom dataset
dataset = CustomDataset(csv_file)

# Define the hyperparameters and model architecture
input_size = len(dataset.features[0])
hidden_size = 64
num_classes = 2

# Set the random seed for reproducibility (optional)
torch.manual_seed(17)

# Split the dataset into train, validation, and test sets
train_data, remaining_data = train_test_split(dataset, test_size=0.2, random_state=17)
val_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=17)

# Create data loaders for training, validation, and testing sets
batch_size = 128
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the dimensions and hyperparameters
input_size = 6
hidden_size = 64
num_classes = 2
learning_rate = 0.001
batch_size = 128
num_epochs = 11

# Create dummy input and target tensors and move them to the device
inputs = torch.randn(batch_size, input_size).to(device)
targets = torch.empty(batch_size, dtype=torch.long).random_(num_classes).to(device)

# Create the hybrid model and move it to the device
class HybridModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(HybridModel, self).__init__()

        self.conv = nn.Conv1d(input_size, hidden_size, kernel_size=3, padding=1)
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=hidden_size,
                nhead=4,
                dim_feedforward=hidden_size,
                dropout=0.1
            ),
            num_layers=2
        )
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.unsqueeze(2)
        conv_output = self.conv(x)
        conv_output = conv_output.permute(0, 2, 1)
        transformer_output = self.transformer_encoder(conv_output)
        output = self.fc(transformer_output)
        return output.squeeze(1)  # Squeeze the output to have shape [batch_size]

# Create an instance of the hybrid model and move it to the device
model = HybridModel(input_size, hidden_size, num_classes).to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.argmax(dim=1).to(device)  # Transform targets to have shape [batch_size]

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

# Testing
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
test_predictions = []
test_targets = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        test_loss += loss.item() * inputs.size(0)

        _, predictions = torch.max(outputs, 1)
        test_predictions.extend(predictions.tolist())
        test_targets.extend(targets.argmax(dim=1).tolist())

test_loss /= len(test_data)
accuracy = accuracy_score(test_targets, test_predictions)
precision = precision_score(test_targets, test_predictions)
recall = recall_score(test_targets, test_predictions)
f1 = f1_score(test_targets, test_predictions)
auc = roc_auc_score(test_targets, test_predictions)
cm = confusion_matrix(test_targets, test_predictions)

print('Test Results')
print('-------------------------')
print(f'Loss: {test_loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'AUC: {auc:.4f}')
print('Confusion Matrix:')
print(cm)
print('-------------------------')



Epoch 1/11, Loss: 0.7633559703826904
Epoch 2/11, Loss: 0.5143526792526245
Epoch 3/11, Loss: 0.5647080540657043
Epoch 4/11, Loss: 0.6137592792510986
Epoch 5/11, Loss: 0.6550619602203369
Epoch 6/11, Loss: 0.6794040203094482
Epoch 7/11, Loss: 0.7236048579216003
Epoch 8/11, Loss: 0.8848530650138855
Epoch 9/11, Loss: 0.6312944889068604
Epoch 10/11, Loss: 0.5669759511947632
Epoch 11/11, Loss: 0.561872661113739
Test Results
-------------------------
Loss: 0.6175
Accuracy: 0.6546
Precision: 0.6628
Recall: 0.9266
F1 Score: 0.7728
AUC: 0.5549
Confusion Matrix:
[[ 131  584]
 [  91 1148]]
-------------------------
