In [30]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [31]:
# Step 1: Load the dataset
df = pd.read_csv(r'C:\Users\SUJAN\preprocessed_data.csv')  
print(df.head())


   sensor_00  sensor_01  sensor_02  sensor_03  sensor_04  sensor_05  \
0   2.465394   47.09201    53.2118  46.310760   634.3750   76.45975   
1   2.465394   47.09201    53.2118  46.310760   634.3750   76.45975   
2   2.444734   47.35243    53.2118  46.397570   638.8889   73.54598   
3   2.460474   47.09201    53.1684  46.397568   628.1250   76.98898   
4   2.445718   47.13541    53.2118  46.397568   636.4583   76.58897   

   sensor_06  sensor_07  sensor_08  sensor_09  ...  sensor_44  sensor_45  \
0   13.41146   16.13136   15.56713   15.05353  ...  39.641200   65.68287   
1   13.41146   16.13136   15.56713   15.05353  ...  39.641200   65.68287   
2   13.32465   16.03733   15.61777   15.01013  ...  39.351852   65.39352   
3   13.31742   16.24711   15.69734   15.08247  ...  39.062500   64.81481   
4   13.35359   16.21094   15.69734   15.08247  ...  38.773150   65.10416   

   sensor_46  sensor_47  sensor_48  sensor_49  sensor_50  sensor_51  \
0   50.92593  38.194440   157.9861   67.70834

In [32]:
# Step 2: Build a graph representation
num_sensors = df.shape[1] - 2  # Subtracting 2 to exclude 'machine_status' and 'date' columns
adjacency_matrix = np.ones((num_sensors, num_sensors))  # Fully connected graph

In [33]:
# Print the adjacency matrix
print(adjacency_matrix)

[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]


In [34]:
# Define the Graph Convolutional Network (GCN) model
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

In [35]:

# Step 6: Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AnomalyDetectionModel(input_dim=4, hidden_dim=64).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCEWithLogitsLoss()  # Binary cross-entropy loss

In [36]:
# Convert the adjacency matrix to PyTorch tensors
edge_index = torch.tensor(np.vstack(np.nonzero(adjacency_matrix)), dtype=torch.long).to(device)
x = torch.tensor(X, dtype=torch.float).to(device)

In [37]:
# Split the data into features and labels
exclude_columns = ['machine_status', 'date']
X = df.drop(columns=exclude_columns).values
y = torch.tensor(df['machine_status'].map({'NORMAL': 0, 'BROKEN': 1, 'RECOVERING': 1}).values, dtype=torch.float).to(device)

In [38]:
# Initialize the GCN model
input_dim = X.shape[1]
hidden_dim = 64
output_dim = 1  # Output dimension for anomaly detection (binary classification)
model = GCN(input_dim, hidden_dim, output_dim)

In [39]:
# Training loop
def train(model, loader, optimizer, criterion):
    model.train()

    for data in loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out, data.y.view(-1, 1))
        loss.backward()
        optimizer.step()

In [44]:
def evaluate(model, loader):
    model.eval()
    predicted_labels = []
    true_labels = []

    with torch.no_grad():
        for data in loader:
            out = model(data.x, data.edge_index)
            predicted = torch.sigmoid(out) > 0.5
            predicted_labels.extend(predicted.squeeze().tolist())
            true_labels.extend(data.y.tolist())

    predicted_labels = torch.tensor(predicted_labels, dtype=torch.float32)  # Convert to tensor
    true_labels = torch.tensor(true_labels, dtype=torch.float32)  # Convert to tensor

    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)
    cm = confusion_matrix(true_labels, predicted_labels)

    return accuracy, precision, recall, f1, cm


In [48]:
predicted_labels = predicted_labels.cpu().numpy()


In [49]:
# Convert the predicted labels and true labels to numpy arrays
predicted_labels = predicted_labels.squeeze()
true_labels = dataset.y.cpu().numpy()

# Calculate evaluation metrics
confusion_mat = confusion_matrix(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

print("Confusion Matrix:\n", confusion_mat)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[205836      0]
 [     0  14484]]
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [57]:
# Plot the results
plt.figure(figsize=(12, 8))
for i in range(len(df.columns)):
    plt.subplot(len(df.columns), 1, i+1)
    plt.plot(test_data[:, i], label='Original Data', color='blue')
    plt.plot(test_output[:, i], label='Predicted Data', color='green')
    plt.scatter(torch.nonzero(anomaly_mask[:, i]).squeeze().tolist(), test_output[:, i][anomaly_mask[:, i]].squeeze().tolist(),
                color='red', label='Anomalies')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
plt.suptitle('GNN Anomaly Detection')
plt.tight_layout()
plt.show()

# Print the anomaly detection results
if torch.any(anomaly_mask):
    print("Anomalies detected!")
    print("Anomaly indices:")
    for i in range(len(df.columns)):
        print(f"Variable {df.columns[i]}:", torch.nonzero(anomaly_mask[:, i]).squeeze().tolist())
else:
    print("No anomalies detected.")

NameError: name 'anomaly_mask' is not defined