In [50]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from torch import nn

In [None]:
dataset = pd.read_csv('sensor.csv')
dataset.head()

In [None]:
print(dataset['machine_status'].value_counts())
dataset.info()

In [None]:
from sklearn.preprocessing import LabelEncoder

targets = dataset['machine_status']

le = LabelEncoder()
targets_enc = le.fit_transform(targets)
targets_enc, le.classes_

In [None]:
dataset_d = dataset.drop(['machine_status', 'Unnamed: 0', 'timestamp'], axis=1)
dataset['timestamp'] = pd.to_datetime(dataset['timestamp'])
dataset_d['month'] = dataset['timestamp'].dt.month
dataset_d['day'] = dataset['timestamp'].dt.day
dataset_d['hour'] = dataset['timestamp'].dt.hour
dataset_d['minute'] = dataset['timestamp'].dt.minute
dataset_d.head()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('std_scaler', StandardScaler()),
])

dataset_prepared = pipeline.fit_transform(dataset_d)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
dataset_prepared = torch.tensor(dataset_prepared, dtype=torch.float, device=device)
dataset_prepared.shape

In [None]:
y = targets.values != 'NORMAL'
id_normal = np.argwhere(y == False).flatten()
id_anomaly = np.argwhere(y == True).flatten()
len(id_normal), len(id_anomaly)

In [75]:
data_normal = dataset_prepared[id_normal]
data_anomaly = dataset_prepared[id_anomaly]

dataset_normal = TensorDataset(data_normal, torch.tensor(y[id_normal]))
dataset_anomaly = TensorDataset(data_anomaly, torch.tensor(y[id_anomaly]))

In [78]:
from torch.utils.data import DataLoader, random_split

len_valid = 14484
dataset_train, dataset_valid = random_split(dataset_normal, [len(dataset_normal) - len_valid, len_valid])
dataset_valid += dataset_anomaly

dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_valid = DataLoader(dataset_valid, batch_size=32, shuffle=False)

In [79]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            #nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
from torch.utils.data import DataLoader, TensorDataset

#data_loader = DataLoader(TensorDataset(dataset_prepared), batch_size=512, shuffle=True)

model = Autoencoder(dataset_prepared.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)

for epoch in range(1):
    for inputs, y in dataloader_train:
        outputs = model(inputs)
        loss = criterion(outputs, inputs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 2 == 0:    
        print(f"{epoch}: loss: {loss.item():.4f}")

In [121]:
# detecting anomalies
def detect_anomalies(data, threshold):
    #data = torch.tensor(data, dtype=torch.float32)
    outputs = model(data)
    losses = nn.functional.mse_loss(outputs, data, reduction='none').mean(dim=1)
    anomalies = losses > threshold
    return anomalies

dataset_valid_tensor = torch.cat([dataset_valid[i][0][None, :] for i in range(len(dataset_valid))], dim=0)
y_valid_tensor = torch.cat([dataset_valid[i][1][None] for i in range(len(dataset_valid))], dim=0)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

thresholds = np.arange(0.01, 0.5, 0.01)
score_acc = []
score_prec = []
score_rec = []

for threshold in thresholds:
    preds = detect_anomalies(dataset_valid_tensor, threshold).cpu()
    score_acc.append(accuracy_score(y_valid_tensor, preds))
    score_prec.append(precision_score(y_valid_tensor, preds))
    score_rec.append(recall_score(y_valid_tensor, preds))

plt.plot(thresholds, score_acc, label='accuracy_score')
plt.plot(thresholds, score_prec, label='precision_score')
plt.plot(thresholds, score_rec, label='recall_score')
plt.legend()
plt.show()

In [None]:
threshold = 0.3
preds = detect_anomalies(dataset_valid_tensor, threshold).cpu()

print(sum(preds), sum(y_valid_tensor))
#print(np.argwhere(preds))
#print(np.argwhere(y))

In [None]:
print("accuracy: ", accuracy_score(y_valid_tensor, preds))
print("precision: ", precision_score(y_valid_tensor, preds))
print("recall: ", recall_score(y_valid_tensor, preds))