In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
plt.rcParams['figure.figsize'] = (15.0, 8.0)

In [4]:
from operator import index


sensor_data = pd.read_csv('sensor.csv', parse_dates=['timestamp'], index_col='timestamp')
display(sensor_data.info())
display(sensor_data.head())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 220320 entries, 2018-04-01 00:00:00 to 2018-08-31 23:59:00
Data columns (total 54 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   Unnamed: 0      220320 non-null  int64  
 1   sensor_00       210112 non-null  float64
 2   sensor_01       219951 non-null  float64
 3   sensor_02       220301 non-null  float64
 4   sensor_03       220301 non-null  float64
 5   sensor_04       220301 non-null  float64
 6   sensor_05       220301 non-null  float64
 7   sensor_06       215522 non-null  float64
 8   sensor_07       214869 non-null  float64
 9   sensor_08       215213 non-null  float64
 10  sensor_09       215725 non-null  float64
 11  sensor_10       220301 non-null  float64
 12  sensor_11       220301 non-null  float64
 13  sensor_12       220301 non-null  float64
 14  sensor_13       220301 non-null  float64
 15  sensor_14       220299 non-null  float64
 16  sensor_15       0 non-

None

Unnamed: 0_level_0,Unnamed: 0,sensor_00,sensor_01,sensor_02,sensor_03,sensor_04,sensor_05,sensor_06,sensor_07,sensor_08,...,sensor_43,sensor_44,sensor_45,sensor_46,sensor_47,sensor_48,sensor_49,sensor_50,sensor_51,machine_status
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-04-01 00:00:00,0,2.465394,47.09201,53.2118,46.31076,634.375,76.45975,13.41146,16.13136,15.56713,...,41.92708,39.6412,65.68287,50.92593,38.19444,157.9861,67.70834,243.0556,201.3889,NORMAL
2018-04-01 00:01:00,1,2.465394,47.09201,53.2118,46.31076,634.375,76.45975,13.41146,16.13136,15.56713,...,41.92708,39.6412,65.68287,50.92593,38.19444,157.9861,67.70834,243.0556,201.3889,NORMAL
2018-04-01 00:02:00,2,2.444734,47.35243,53.2118,46.39757,638.8889,73.54598,13.32465,16.03733,15.61777,...,41.66666,39.351852,65.39352,51.21528,38.194443,155.9606,67.12963,241.3194,203.7037,NORMAL
2018-04-01 00:03:00,3,2.460474,47.09201,53.1684,46.397568,628.125,76.98898,13.31742,16.24711,15.69734,...,40.88541,39.0625,64.81481,51.21528,38.19444,155.9606,66.84028,240.4514,203.125,NORMAL
2018-04-01 00:04:00,4,2.445718,47.13541,53.2118,46.397568,636.4583,76.58897,13.35359,16.21094,15.69734,...,41.40625,38.77315,65.10416,51.79398,38.77315,158.2755,66.55093,242.1875,201.3889,NORMAL


In [5]:
sensor_data.drop(['Unnamed: 0'], axis=1, inplace=True)
sensor_data.dropna(axis=1, thresh=15000, inplace=True)
sensor_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 220320 entries, 2018-04-01 00:00:00 to 2018-08-31 23:59:00
Data columns (total 52 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   sensor_00       210112 non-null  float64
 1   sensor_01       219951 non-null  float64
 2   sensor_02       220301 non-null  float64
 3   sensor_03       220301 non-null  float64
 4   sensor_04       220301 non-null  float64
 5   sensor_05       220301 non-null  float64
 6   sensor_06       215522 non-null  float64
 7   sensor_07       214869 non-null  float64
 8   sensor_08       215213 non-null  float64
 9   sensor_09       215725 non-null  float64
 10  sensor_10       220301 non-null  float64
 11  sensor_11       220301 non-null  float64
 12  sensor_12       220301 non-null  float64
 13  sensor_13       220301 non-null  float64
 14  sensor_14       220299 non-null  float64
 15  sensor_16       220289 non-null  float64
 16  sensor_17       220274

In [6]:
conditions = [(sensor_data['machine_status'] =='NORMAL'), (sensor_data['machine_status'] =='BROKEN'), (sensor_data['machine_status'] =='RECOVERING')]
choices = [1, 0, 0.5]
sensor_data['Operation'] = np.select(conditions, choices, default=0)
df0 = pd.DataFrame(sensor_data, columns=['Operation','sensor_04', 'sensor_06', 'sensor_07', 'sensor_08', 'sensor_09']).dropna()
df0.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 214251 entries, 2018-04-01 00:00:00 to 2018-08-31 23:59:00
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   Operation  214251 non-null  float64
 1   sensor_04  214251 non-null  float64
 2   sensor_06  214251 non-null  float64
 3   sensor_07  214251 non-null  float64
 4   sensor_08  214251 non-null  float64
 5   sensor_09  214251 non-null  float64
dtypes: float64(6)
memory usage: 11.4 MB


In [23]:
df0.values[:, 1:].astype(np.float32).transpose().shape

(5, 214251)

In [74]:
class SensorDataset(torch.utils.data.Dataset):
    def __init__(self, values: np.array, window_length: int) -> None:
        super().__init__()
        self.window_length = window_length
        self.n_samples = len(values) - window_length + 1
        self.x_data = torch.from_numpy(values[:, 1:].astype(np.float32))
        self.y_data = torch.from_numpy(values[:, 0].astype(np.float32)).view(-1, 1)
    
    def __getitem__(self, index: int) -> torch.Tensor:
        x_val = self.x_data[index: self.window_length + index]
        if x_val.shape[0] < self.window_length:
            print(x_val.shape)
            raise IndexError
        else:
            return x_val, self.y_data[index + self.window_length - 1]
    
    def __len__(self) -> int:
        return self.n_samples


train_data, test_data = df0.values[:107125], df0.values[107125:]
train_dataset = SensorDataset(train_data, window_length=800)
test_dataset = SensorDataset(test_data, window_length=800)

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=16,
    shuffle=True,
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=16,
    shuffle=False
)

dataloaders = {
    'train': train_loader,
    'eval': test_loader
}

dataset_sizes = {
    'train': len(train_dataset),
    'eval': len(test_dataset)
}

In [58]:
x, y = train_dataset[0]
x, y

(tensor([[634.3750, 634.3750, 638.8889,  ..., 630.4398, 631.3657, 639.3518],
         [ 13.4115,  13.4115,  13.3246,  ...,  13.3463,  13.3246,  13.2885],
         [ 16.1314,  16.1314,  16.0373,  ...,  16.2037,  16.1603,  16.2471],
         [ 15.5671,  15.5671,  15.6178,  ...,  15.6539,  15.7697,  15.6973],
         [ 15.0535,  15.0535,  15.0101,  ...,  15.0897,  15.0825,  15.0825]]),
 tensor([1.], dtype=torch.float16))

In [97]:
class ConvNet(nn.Module):
    def __init__(self) -> None:
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv1d(5, 16, 100)  # L_out = 1075 + 2 * 1 - 1 * (100 - 1) = 978
        self.pool1 = nn.MaxPool1d(3)  # L_out = 978 + 2 - 99 = 881
        self.conv2 = nn.Conv1d(16, 32, 100)  # L_out = 881  + 2 - 99 = 784
        self.pool2 = nn.MaxPool1d(3)  # L_out = 784 + 3 - 100 = 687
        self.fc1 = nn.Linear(32*44, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 1)
    
    def forward(self, x):
        out = self.conv1(x)
        out = F.relu(out)
        # print("Conv1 shape output", out.shape)
        out = self.pool1(out)
        # print('Pool1 shape output:', out.shape)
        
        out = self.conv2(out)
        out = F.relu(out)
        # print("Conv2 shape output", out.shape)
        out = self.pool2(out)
        # print('Pool2 shape output:', out.shape)

        out = out.view(-1, 32*44)

        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = torch.sigmoid(self.fc3(out))

        return out


class LstmNet(nn.Module):
    def __init__(self, n_features: int, seq_length: int) -> None:
        super(LstmNet, self).__init__()
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=32,
            batch_first=True,
        )
        self.pool = nn.MaxPool1d(10)
        self.fc = nn.Linear(32*80, 1)
    
    def forward(self, x):
        # print(f'{x.shape=}')
        out, _ = self.lstm(x)
        # print(f'{out.shape=}')
        out = self.pool(out.reshape(-1, out.shape[2], out.shape[1]))
        # print(f'{out.shape=}')
        out = out.view(-1, 32*80)
        # print(f'{out.shape=}')
        out = self.fc(out)
        out = torch.sigmoid(out)
        return out

In [98]:
num_epochs = 3

model = LstmNet(5, 800).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for phase in ['train', 'eval']:
        if phase == 'train':
            model.train()
        else:
            model.eval()
        
        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            phase, epoch_loss, epoch_acc))

Epoch 0/2
----------
train Loss: 0.0451 Acc: 0.0006
eval Loss: 0.0759 Acc: 0.0005
Epoch 1/2
----------
train Loss: 0.0389 Acc: 0.0006
eval Loss: 0.0624 Acc: 0.0005
Epoch 2/2
----------
train Loss: 0.0395 Acc: 0.0006
eval Loss: 0.0587 Acc: 0.0005


In [118]:
df0.reset_index().query('Operation == 0')

Unnamed: 0,timestamp,Operation,sensor_04,sensor_06,sensor_07,sensor_08,sensor_09
17155,2018-04-12 21:55:00,0.0,202.526031,3.219039,16.89091,16.86921,15.08247
24466,2018-04-18 00:30:00,0.0,206.038757,12.30469,15.1548,14.18547,13.86719
68563,2018-05-19 03:18:00,0.0,200.115738,13.5923,15.91435,15.14757,14.79311
76466,2018-05-25 00:30:00,0.0,612.1528,14.0625,16.6088,15.94329,15.59606
126716,2018-06-28 22:00:00,0.0,201.368622,11.33536,15.27054,15.18374,15.11863
135068,2018-07-08 00:11:00,0.0,500.0,0.028935,0.036169,0.036169,0.007234
160374,2018-07-25 14:00:00,0.0,420.503448,14.18547,16.24711,15.69734,15.05353


In [122]:
example_output = None
for i in range(test_dataset.n_samples):
    example_output = test_dataset[i][1]
    if 0 in example_output:
        break

In [124]:
example_output, i

(tensor([0.]), 18792)

In [143]:
example_inputs = test_dataset[18793][0].view(1, 800, 5)

In [144]:
model(example_inputs.to(device))

tensor([[0.8194]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [11]:
values = df0.values
display(values.shape)
windowed_data = []
for i in range(len(values)):
    if len(values[i: 800+i]) == 800:
        windowed_data.append(values[i: 800+i])
    else:
        break
display(len(windowed_data))

(214251, 6)

213452

(213452, 800, 6)