In [16]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.cuda.amp import autocast, GradScaler
from torch.cuda.amp import autocast
from joblib import load

In [27]:
class LSTM_Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size, seq_len, num_layers=2):
        super(LSTM_Autoencoder, self).__init__()
        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.decoder = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.output_layer = nn.Linear(hidden_size, input_size)
        self.hidden_size = hidden_size
        self.seq_len = seq_len

    def forward(self, x):
        h0 = torch.zeros(self.encoder.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.encoder.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        encoded, (hn, cn) = self.encoder(x, (h0, c0))
        
        decoded, _ = self.decoder(encoded, (hn, cn))
        
        decoded = self.output_layer(decoded)
        
        return decoded

In [30]:
def pipeline(data_rec,comb_dataset, house, House):
    #check if missing values 
    missing_values = data_rec.isnull().sum()
    if(len(missing_values[missing_values >0])):
        print(missing_values[missing_values >0])
        data_rec.fillna(0)
    else:
        flag=0
    if flag==0:
        print('No missing values in the record')
        
    for i in range(1,21):
        if data_rec[i] not in [0,1]:
            print("invalid value assigned to 0")
            data_rec[i]=0
    for i in range(21,23):
        if (data_rec[i]<1) or (data_rec[i]>27):
            print("invalid value assigned to 0")
            data_rec[i]=1
    #drop columns not needed for lstm
    data = data_rec.drop(['Unnamed: 0']) # Remove non-sensor data
    # Initialize the model (ensure the parameters match the saved model)
    input_size = 25  # Adjust based on your data
    hidden_size = 256  # Adjust based on your saved model
    seq_len = 1      # Adjust based on your sequence length
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Initialize the model and move it to GPU
    model = LSTM_Autoencoder(input_size=input_size, hidden_size=hidden_size, seq_len=seq_len).to(device)
    
    
   # Load the state_dict
    state_dict = torch.load(f"anomaly detection/House_{House}_Model_Files/lstm_autoencoder_{house}.pth", map_location=device)

    # Remove the "module." prefix from the keys
    new_state_dict = {}
    for key, value in state_dict.items():
        new_key = key.replace("module.", "")  # Remove the "module." prefix
        new_state_dict[new_key] = value

    # Load the updated state_dict into the model
    model.load_state_dict(new_state_dict)

    # Set the model to evaluation mode
    model.eval()


    reconstruction_errors = np.load(f"anomaly detection/House_{House}_Model_Files/reconstruction_errors_{house}.npy")

    threshold = np.percentile(reconstruction_errors, 95)
    anomalies = reconstruction_errors > threshold


    # Load the scaler from the file
    sc = load(f'anomaly detection/House_{House}_Model_Files/standardscaler_{house}.joblib')

    # Transform new data using the StandardScaler
    new_data = sc.transform(np.array([data]))

    # Convert to tensor and reshape for LSTM
    new_data_tensor = torch.tensor(new_data, dtype=torch.float32).unsqueeze(0).to(device)  
    # Shape: (batch_size=1, sequence_length=1, input_size=22)

    # Forward pass through the model
    model.eval()
    with torch.no_grad():
        new_data_reconstructed = model(new_data_tensor)
        reconstruction_error = torch.mean((new_data_tensor - new_data_reconstructed) ** 2).item()

    # Compare the error with the threshold
    is_anomaly = reconstruction_error > threshold


    if(is_anomaly==True):
        print("Current Record is an anomaly")
    else:
        print("Current Record is not an anomaly")

In [31]:

houseA = pd.read_csv("Aras/house_a_combined_dataset.csv")
data_rec= houseA.iloc[0,:]
house= 'ha'
House= 'A'
pipeline(data_rec,houseA, house, House)

No missing values in the record
Current Record is not an anomaly


  if data_rec[i] not in [0,1]:
  if (data_rec[i]<1) or (data_rec[i]>27):
  state_dict = torch.load(f"anomaly detection/House_{House}_Model_Files/lstm_autoencoder_{house}.pth", map_location=device)
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
