In [173]:
import pandas as pd
from collections import Counter

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from imblearn.over_sampling import RandomOverSampler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

data_file = "dat.csv"

df = pd.read_csv(data_file)

cols = ["day_of_week", "hour_of_day", "motion_duration"]
X = df[cols].values
y = df["label"].values

In [174]:
class EarlyStopping:
    def __init__(self, patience=4, delta=0.0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_state = None

    def __call__(self, val_loss, model):
        if self.best_loss is None or val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.best_state = model.state_dict()
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [None]:
scaler = MinMaxScaler()

df[cols] = scaler.fit_transform(df[cols]) # to normalize values since some are orders of magnitude greater than others, e.g. motion_duration
NEED THIS TO BE DONE ON TRAIN AND TEST SEPARATELY AS NOT TO LEAK GLOBAL MEAN

print(df)

SEQ_LEN = 10

X_seq, Y_seq = [], []

# sliding window to make preds on each timestep
for i in range(len(X) - SEQ_LEN):
    X_seq.append(X[i:i+SEQ_LEN])
    Y_seq.append(y[i+SEQ_LEN])



X_train, X_test, Y_train, Y_test = train_test_split(X_seq, Y_seq, test_size=0.2, random_state=32)

X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
Y_train = torch.tensor(Y_train, dtype=torch.int64)
Y_test = torch.tensor(Y_test, dtype=torch.int64)
class_counts = np.bincount(Y_train)
class_weights = 1.0 / class_counts
sample_weights = class_weights[Y_train]

print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")

from torch.utils.data import WeightedRandomSampler

class_counts = np.bincount(Y_train)
class_weights = 1.0 / class_counts
sample_weights = class_weights[Y_train]

print("sample weights", sample_weights)

print(Counter(all_labels))

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(TensorDataset(X_train,
                                        Y_train),
                          batch_size=16, sampler=sampler)
test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=16)


from collections import Counter

all_labels = []

for _, labels in train_loader:
    all_labels.extend(labels.numpy().astype(int))

print(Counter(all_labels))

               timestamp  day_of_week  hour_of_day  motion_duration  label
0    2025-05-20 02:15:00     0.000000     0.086957         0.749948      1
1    2025-05-20 07:15:00     0.000000     0.304348         0.017295      0
2    2025-05-20 12:30:00     0.000000     0.521739         0.033132      0
3    2025-05-20 14:00:00     0.000000     0.608696         0.001875      0
4    2025-05-20 18:45:00     0.000000     0.782609         0.062305      0
..                   ...          ...          ...              ...    ...
643  2025-09-04 03:55:00     0.333333     0.130435         0.193582      0
644  2025-09-04 07:15:00     0.333333     0.304348         0.616587      1
645  2025-09-04 11:40:00     0.333333     0.478261         0.071473      1
646  2025-09-04 18:00:00     0.333333     0.782609         0.012294      0
647  2025-09-04 22:35:00     0.333333     0.956522         0.176912      0

[648 rows x 5 columns]
X_train shape: torch.Size([510, 10, 3])
Y_train shape: torch.Size([510])
sam

In [176]:
class Detector(nn.Module):
    def __init__(self, input_size=3, hidden_size=32, num_layers=2, bidirectional=True):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                           batch_first=True, bidirectional=bidirectional) # perhaps make bidirectional to learn patterns relating to other motions
        lstm_output_size = hidden_size * 2 if bidirectional else hidden_size
        self.fc = nn.Linear(lstm_output_size, 1)
        print(f"creating Detector with hidden_size={hidden_size}, and num_layers={num_layers}")

    def forward(self, x):
        out, _ = self.lstm(x)
        return torch.sigmoid(self.fc(out[:, -1, :])).squeeze()


In [210]:
model = Detector()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

creating Detector with hidden_size=32, and num_layers=2


In [211]:
early_stopper = EarlyStopping(10, 1e-4)

In [212]:
from sklearn.metrics import f1_score

In [213]:
Y_test.float()
for epoch in range(100):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)  # shape: [batch_size]
        loss = criterion(outputs, y_batch.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

    model.eval()
    with torch.no_grad():

        for X_batch, y_batch in test_loader:
            test_outputs = model(X_batch)
            test_loss = criterion(test_outputs, y_batch.float())

            test_outputs = test_outputs.round().numpy()
            print(y_batch, "huh", test_outputs)
            f1 = f1_score(y_batch.numpy(), test_outputs)
            acc = (test_outputs == y_batch).float().mean().item()

        # compute F1 (need to convert to numpy)
        
            print(f"Epoch {epoch+1}, "
            f"Val Loss: {test_loss.item():.4f}, "
            f"Val Acc: {acc*100:.2f}%, "
            f"Val F1: {f1:.4f}")
        
    early_stopper(test_loss.item(), model)

    if early_stopper.early_stop:
        print("stopping due to minimal improvements to prevent overfitting")
        break

Epoch 1, Loss: 0.6780
tensor([1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0]) huh [0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0.]
Epoch 1, Val Loss: 0.6829, Val Acc: 50.00%, Val F1: 0.4286
tensor([0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]) huh [0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1.]
Epoch 1, Val Loss: 0.6336, Val Acc: 81.25%, Val F1: 0.6667
tensor([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0]) huh [0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0.]
Epoch 1, Val Loss: 0.6764, Val Acc: 50.00%, Val F1: 0.5000
tensor([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]) huh [1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0.]
Epoch 1, Val Loss: 0.6728, Val Acc: 56.25%, Val F1: 0.4615
tensor([0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]) huh [0. 0. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1.]
Epoch 1, Val Loss: 0.6271, Val Acc: 87.50%, Val F1: 0.8750
tensor([0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0]) huh [0. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
Epoch 1, Val Loss

In [214]:
sanity_data_file = "sanity_check.csv"

df = pd.read_csv(data_file)

cols = ["day_of_week", "hour_of_day", "motion_duration"]
X = df[cols].values
y = df["label"].values
scaler = MinMaxScaler()

df[cols] = scaler.fit_transform(df[cols]) # to normalize values since some are orders of magnitude greater than others, e.g. motion_duration

df.head()

finalX = df[cols].values
finalY = df["label"].values

X_seq, Y_seq = [], []

# sliding window to make preds on each timestep
for i in range(len(finalX) - SEQ_LEN):
    X_seq.append(finalX[i:i+SEQ_LEN])
    Y_seq.append(finalY[i+SEQ_LEN])

finalX = torch.tensor(X_seq, dtype=torch.float32)
finalY = torch.tensor(Y_seq, dtype=torch.float32)
final_loader = DataLoader(TensorDataset(finalX, finalY),
                          batch_size=32)

In [215]:
with torch.no_grad():

        for X_batch, y_batch in final_loader:
            test_outputs = model(X_batch)
            test_loss = criterion(test_outputs, y_batch.float())

            test_outputs = test_outputs.round().numpy()
            print(y_batch, "huh", test_outputs)
            f1 = f1_score(y_batch.numpy(), test_outputs)
            acc = (test_outputs == y_batch).float().mean().item()

        # compute F1 (need to convert to numpy)
        
            print(f"Val Loss: {test_loss.item():.4f}, "
            f"Val Acc: {acc*100:.2f}%, "
            f"Val F1: {f1:.4f}")
        

tensor([0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
        1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0.]) huh [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
Val Loss: 0.5770, Val Acc: 71.88%, Val F1: 0.0000
tensor([0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.]) huh [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
 1. 0. 0. 0. 0. 0. 0. 0.]
Val Loss: 0.5200, Val Acc: 78.12%, Val F1: 0.2222
tensor([0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0.]) huh [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
Val Loss: 0.5903, Val Acc: 75.00%, Val F1: 0.0000
tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 1., 1., 0.

In [216]:
model.load_state_dict(early_stopper.best_state)

<All keys matched successfully>