In [378]:
import pandas as pd

df = pd.read_csv("dat.csv")

cols = ["day_of_week", "hour_of_day", "motion_duration"]
X = df[cols].values
y = df["label"].values
df.head()

intrusions = df[df['label'] == 1]
non_intrusions = df[df['label'] == 0]

print(len(intrusions), len(non_intrusions))

169 479


In [368]:
# to help make sure the model doesn't overfit, use early stopping when loss improves negligibly by delta

class EarlyStopping:
    def __init__(self, patience=4, delta=0.0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_state = None

    def __call__(self, val_loss, model):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.best_state = model.state_dict()
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [369]:
from sklearn.preprocessing import MinMaxScaler

split_idx = int(len(X) * 0.8)

X_train = X[:split_idx]
y_train = y[:split_idx]
X_test = X[split_idx:]
y_test = y[split_idx:]

# not using train_test_split because that apparently shuffles the data destroying the temporal nature of my data :/ not too bad  though. just slicey

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [370]:
# sliding window for LSTM timesteps per input, using a window of data to make a prediction on one piece

import numpy as np

SEQ_LEN = 10
def create_sequences(X, y, seq_len):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])
        y_seq.append(y[i+seq_len])
    return np.array(X_seq), np.array(y_seq)

X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train, SEQ_LEN)
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test, SEQ_LEN)


In [371]:
import torch
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler

X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_seq, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_seq, dtype=torch.float32)

class_counts = np.bincount(y_train_seq)
print(class_counts)
class_weights = 1.0 / class_counts
print(class_weights)
sample_weights = class_weights[y_train_seq]

sampler=WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=16, sampler=sampler)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=16)

[379 129]
[0.00263852 0.00775194]


In [372]:
import torch.nn as nn

class Detector(nn.Module):
    def __init__(self, input_size=3, hidden_size=32, num_layers=1, bidirectional=True):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                           batch_first=True, bidirectional=bidirectional) # perhaps make bidirectional to learn patterns relating to other motions
        lstm_output_size = hidden_size * 2 if bidirectional else hidden_size
        self.fc = nn.Linear(lstm_output_size, 1)
        print(f"creating Detector with hidden_size={hidden_size}, and num_layers={num_layers}")

    def forward(self, x):
        out, _ = self.lstm(x)
        return torch.sigmoid(self.fc(out[:, -1, :])).squeeze()

In [373]:
model = Detector()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

creating Detector with hidden_size=32, and num_layers=1


In [374]:
early_stopper = EarlyStopping() # trying with default paramss

In [375]:
from sklearn.metrics import f1_score

for epoch in range(400):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    avg_train_loss = total_loss / len(train_loader)

    model.eval()
    val_loss = 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item()
            all_preds.extend(output.round().numpy())
            all_labels.extend(y_batch.numpy())
    
    avg_val_loss = val_loss / len(test_loader)

    acc = np.mean(np.array(all_preds) == np.array(all_labels))

    early_stopper(val_loss, model)

    if early_stopper.early_stop:
        print("early stop triggered!!")
        break

    f1 = f1_score(all_labels, all_preds)

    print(f"epoch {epoch + 1} | train loss: {avg_train_loss:.4f} | val loss: {avg_val_loss:.4f} | Val Acc: {acc*100:.2f}% | Val F1: {f1:.4f}")

epoch 1 | train loss: 0.6945 | val loss: 0.6904 | Val Acc: 68.33% | Val F1: 0.2692
epoch 2 | train loss: 0.6909 | val loss: 0.7073 | Val Acc: 28.33% | Val F1: 0.4416
epoch 3 | train loss: 0.6870 | val loss: 0.6997 | Val Acc: 28.33% | Val F1: 0.4416
epoch 4 | train loss: 0.6820 | val loss: 0.6662 | Val Acc: 75.83% | Val F1: 0.4314
epoch 5 | train loss: 0.6656 | val loss: 0.6338 | Val Acc: 82.50% | Val F1: 0.6957
epoch 6 | train loss: 0.5600 | val loss: 0.5403 | Val Acc: 70.00% | Val F1: 0.5500
epoch 7 | train loss: 0.4353 | val loss: 0.4749 | Val Acc: 77.50% | Val F1: 0.6747
epoch 8 | train loss: 0.4048 | val loss: 0.4375 | Val Acc: 80.00% | Val F1: 0.7000
epoch 9 | train loss: 0.3936 | val loss: 0.4644 | Val Acc: 78.33% | Val F1: 0.6286
epoch 10 | train loss: 0.2998 | val loss: 0.4005 | Val Acc: 78.33% | Val F1: 0.6905
epoch 11 | train loss: 0.2841 | val loss: 0.3299 | Val Acc: 82.50% | Val F1: 0.7200
epoch 12 | train loss: 0.2768 | val loss: 0.4002 | Val Acc: 81.67% | Val F1: 0.7027
e