In [1]:
import torch

import copy
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from torch import nn, optim

import torch.nn.functional as F
from tqdm import tqdm

torch.manual_seed(42)

<torch._C.Generator at 0x1746f333250>

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
temp_df = pd.read_csv(r"D:\Uptime AI\Uptime AI\baldwin_pump_data.csv")
temp_df.head()

Unnamed: 0,Time Step,BFP_SEAL_WATER_FLOW,BFP_SUCT_HDR_PRESS________,AUX_CDSR_2B_PRESS_________,BFP_SUCTION_HDR_TEMP______,BFPT_B_VALVE_DEMAND_HP_STO,BFPT_B_VALVE_DEMAND_LP_STO,BFPT_B_CALC_VALVE_DEMAND__,2B_TDBFP_RECIRC_VLV_DMD___,BFP_2B_DISCHARGE_FLOW_____,...,BFPT_2B_RAD_SHAFT_VIB_IB_B,BFP_2B_AXIAL_SHAFT_VIB____,BFPT_2B_THRUST_BRG_WEAR___,BFP_2B_RAD_SHAFT_VIB_OB_BR,BFP_2B_RAD_SHAFT_VIB_IB_BR,BFPT_2B_AXIAL_SHAFT_VIB___,MAIN_STEAM_FLOW_TO_BFPS___,EXT_STM_FLOW_TO_BFP_2B____,CW_FRM_AUX_CDSR_2B________,CW_TO_AUX_CDS_2B
0,5/1/98 0:00,14.35,136.18,29.2,88.57,0.01,0.0,0.0,99.98,0.0,...,,,,,,,0.0,,68.11,65.72
1,5/1/98 0:05,14.35,136.18,29.2,88.57,0.01,0.0,0.0,99.98,0.0,...,,,,,,,0.0,,68.11,65.72
2,5/1/98 0:10,14.35,136.18,29.2,88.57,0.01,0.0,0.0,99.98,0.0,...,,,,,,,0.0,,68.11,65.72
3,5/1/98 0:15,14.35,136.18,29.2,88.57,0.01,0.0,0.0,99.98,0.0,...,,,,,,,0.0,,68.11,65.72
4,5/1/98 0:20,14.34,136.18,29.2,88.57,0.01,0.0,0.0,99.98,0.0,...,,,,,,,0.0,,68.11,65.72


In [4]:
df  = temp_df.copy()

In [5]:
n_features = df.shape[1]-1
batch_size = 16

In [6]:
#Encoder Decoder for AutoEncoder model
class Encoder(nn.Module):
    def __init__(self,batch_size,n_features,embedding_dim):
        super(Encoder,self).__init__()

        self.batch_size = batch_size
        self.n_features = n_features
        self.embedding_dim = embedding_dim
        self.hidden_dim = embedding_dim*2

        self.rnn1 = nn.LSTM(
            input_size = n_features,
            hidden_size = self.hidden_dim,
            num_layers = 3, dropout = 0.3
        )
        self.rnn2 = nn.LSTM(
            input_size = self.hidden_dim,
            hidden_size = self.embedding_dim,
            num_layers = 1
        )
        
        (hidden_n,cell_n) = (torch.zeros(1,self.batch_size,self.hidden_dim),torch.zeros(1,self.batch_size,self.hidden_dim))
    
    def forward(self,x):
        
        x = x.unsqueeze(0)
        # print(x.size())
        x, (hidden_n,cell_n) = self.rnn1(x)
        x, (hidden_n,cell_n) = self.rnn2(x)
        # print(hidden_n.size())

        return hidden_n.reshape((1,-1,self.embedding_dim))

class Decoder(nn.Module):
    def __init__(self,batch_size,input_dim,output_dim=n_features): #input_dim is same as the embedding dimension
        super(Decoder,self).__init__()

        self.batch_size, self.input_dim = batch_size, input_dim
        self.hidden_dim = input_dim * 2
        self.output_dim = output_dim

        self.rnn1 = nn.LSTM(
            input_size = input_dim,
            hidden_size = input_dim,
            num_layers = 3,dropout = 0.3)

        self.rnn2 = nn.LSTM(
            input_size = input_dim,
            hidden_size =self.hidden_dim,
            num_layers = 1)

        self.dense_layers = nn.Linear(self.hidden_dim,output_dim)

    def forward(self,x):

        x, (hidden_n,cell_n) = self.rnn1(x)
        x, (hidden_n,cell_n) = self.rnn2(x)
        x = x.reshape((-1,self.hidden_dim))

        return self.dense_layers(x)

class RAE(nn.Module):
    def __init__(self,batch_size,n_features,embedding_dim):
        super(RAE,self).__init__()

        self.batch_size, self.n_features = batch_size, n_features
        self.embedding_dim = embedding_dim

        self.encoder = Encoder(batch_size,n_features,embedding_dim).to(device)
        self.decoder = Decoder(batch_size,embedding_dim).to(device)
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)

        return x

In [7]:
model = RAE(batch_size,n_features,embedding_dim=96)
model = model.to(device)

In [8]:
model = torch.load(r"D:\Uptime AI\Uptime AI\Batched.pt")

In [9]:
model

RAE(
  (encoder): Encoder(
    (rnn1): LSTM(47, 192, num_layers=3)
    (rnn2): LSTM(192, 96)
  )
  (decoder): Decoder(
    (rnn1): LSTM(96, 96, num_layers=3)
    (rnn2): LSTM(96, 192)
    (dense_layers): Linear(in_features=192, out_features=47, bias=True)
  )
)

In [10]:
df = df.drop(df[df.BFPT_B_LP_SPEED_INPUT__1__ == 0.0].index)
# print(len(df)) #length of dataframe after removing the shutdown conditions
df.interpolate(method='linear', limit_direction='backward', axis=0,inplace=True)
df.fillna(df.mean(),inplace=True)

In [11]:
df.drop(columns=['Time Step'],inplace=True)
dataset = torch.Tensor(df.values)
loader = torch.utils.data.DataLoader(dataset)

In [12]:
def predict(model,loader):
    predictions,losses = [],[]
    criterion = nn.L1Loss(reduction='mean').to(device)
    with torch.no_grad():
        model = model.eval()
        for seq_true in loader:
            seq_true = seq_true.to(device)
            seq_pred = model(seq_true)

            loss = criterion(seq_pred,seq_true)
            predictions.append(seq_pred.cpu().numpy().flatten())
            losses.append(loss.item())
    return predictions, losses

#Predictions and losses on the entire dataset
predictions, losses = predict(model,loader)

In [13]:
len(predictions),len(df)

(6424, 6424)

In [14]:
#A dataframe for predictions
preds_df = pd.DataFrame(predictions,columns=df.columns,index=df.index)

In [36]:
def anomalous_columns(df,preds_df,num_anomalous_columns:int): 
    percentage_df = (df - preds_df).abs()
    error_series = percentage_df.sum(axis=0)/len(percentage_df)

    error_series = error_series.sort_values(ascending=False).index
    
    return error_series[:num_anomalous_columns]

In [37]:
#You can change the number of anomalous region you want
columns = anomalous_columns(df,preds_df,5)
columns

Index(['BFPT_B_LP_SPEED_INPUT__2__', 'BFPT_B_LP_SPEED_INPUT__1__',
       '2B_TDBFP_DISCHARGE_FLOW___', 'BFP_2B_DISCHARGE_FLOW_____',
       'BFP_DSCH_HDR_PRESS________'],
      dtype='object')