In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import pandas as pd
import glob

In [2]:
# Load Normal and Anomalous Data
normal_features_path = r'F:\VideoAnomalyDetection\Data\extracted-features\Normal\*.csv'
anomalous_features_path = r'F:\VideoAnomalyDetection\Data\extracted-features\Anomaly\*.csv'

In [3]:
# Load normal features
normal_files = glob.glob(normal_features_path)
normal_dfs = [pd.read_csv(file) for file in normal_files]
normal_data = pd.concat(normal_dfs, ignore_index=True)

# Load anomalous features
anomalous_files = glob.glob(anomalous_features_path)
anomalous_dfs = [pd.read_csv(file) for file in anomalous_files]
anomalous_data = pd.concat(anomalous_dfs, ignore_index=True)

# Combine data and create labels
normal_data['label'] = 0
anomalous_data['label'] = 1
df = pd.concat([normal_data, anomalous_data], ignore_index=True)


In [4]:
y_sequences

NameError: name 'y_sequences' is not defined

In [5]:
df

Unnamed: 0,Frame Number,Number of Objects,Spatial Density,Class Distribution,Mean Velocity,Max Velocity,Variance in Velocity,Mean Acceleration,Max Acceleration,Variance in Acceleration,Mean Direction,Direction Variance,Mean Displacement,Angle Variance,Interaction Count,Mean IoU,Spatial Density.1,label
0,0,1,0.000741,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0.004395,0
1,1,1,0.000740,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0,0.0,0.004398,0
2,2,1,0.000741,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",0.015503,0.015503,0.000000,0.000000,0.000000,0.000000,1.688193,0.000000,0.015503,0.000000,0,0.0,0.004394,0
3,3,1,0.000740,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",0.014068,0.014068,0.000000,-0.001435,-0.001435,0.000000,-1.710086,0.000000,0.014068,0.000000,0,0.0,0.004399,0
4,4,1,0.000740,"1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",0.007034,0.007034,0.000000,-0.007033,-0.007033,0.000000,-1.745217,0.000000,0.007034,0.000000,0,0.0,0.004401,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34277,1700,3,0.000167,"0.6666666666666666,0.0,0.3333333333333333,0.0,...",0.244290,0.509510,0.040760,-0.503042,-0.018128,0.177473,-0.167855,0.081126,0.244290,0.081126,0,0.0,0.058492,1
34278,1701,3,0.000166,"0.6666666666666666,0.0,0.3333333333333333,0.0,...",0.582949,0.917791,0.155337,0.338659,0.714549,0.083939,-2.423426,0.298894,0.582949,0.298894,0,0.0,0.058689,1
34279,1702,3,0.000168,"0.6666666666666666,0.0,0.3333333333333333,0.0,...",0.563766,0.798124,0.057178,-0.019183,0.768493,0.358568,-0.211579,5.762207,0.563766,5.762207,0,0.0,0.058019,1
34280,1703,3,0.000170,"0.6666666666666666,0.0,0.3333333333333333,0.0,...",0.227757,0.309818,0.005468,-0.336009,-0.104879,0.027610,2.645068,0.148300,0.227757,0.148300,0,0.0,0.057589,1


In [6]:
# Select features (excluding label)
numerical_cols = df.select_dtypes(include=[np.number]).drop(columns=['label']).columns
X = df[numerical_cols].values
y = df['label'].values


KeyError: 'number_of_objects'

In [7]:
# 2. Prepare Data for LSTM (Split and Reshape into Sequences)
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the input data to have the shape [samples, time steps, features]
# Here we'll assume a sliding window approach with a window size of 10 frames per sequence
sequence_length = 10

In [8]:
def create_sequences(X, y, seq_length):
    sequences = []
    labels = []
    for i in range(len(X) - seq_length):
        sequences.append(X[i:i + seq_length])
        labels.append(y[i + seq_length])  # The label corresponds to the last frame in the sequence
    return np.array(sequences), np.array(labels)

In [9]:
X_train_seq, y_train_seq = create_sequences(X_train, y_train, sequence_length)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, sequence_length)

# 3. Create Dataset and DataLoader
class VideoDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)


In [10]:
train_dataset = VideoDataset(X_train_seq, y_train_seq)
test_dataset = VideoDataset(X_test_seq, y_test_seq)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
#  4. Define the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.hidden_size = hidden_size

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Pass the output of the last time step to the fully connected layer
        out = self.fc(out[:, -1, :])
        return out

In [12]:
# Hyperparameters
input_size = X_train_seq.shape[2]  # Number of features
hidden_size = 64  # Number of LSTM units
num_layers = 1  # Single LSTM layer
output_size = 2  # Binary classification

# 5. Model, Loss Function, and Optimizer
model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
# 6. Training the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')


Epoch [1/10], Loss: 0.6898
Epoch [2/10], Loss: 0.6900
Epoch [3/10], Loss: 0.6897
Epoch [4/10], Loss: 0.6895
Epoch [5/10], Loss: 0.6894
Epoch [6/10], Loss: 0.6892
Epoch [7/10], Loss: 0.6895
Epoch [8/10], Loss: 0.6892
Epoch [9/10], Loss: 0.6893
Epoch [10/10], Loss: 0.6890


In [14]:
# 7. Evaluate the Model
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(y_batch.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())


In [15]:
# 8. Print Accuracy and Classification Report
print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
print(classification_report(y_true, y_pred, target_names=['Normal', 'Anomalous']))

Accuracy: 0.5357
              precision    recall  f1-score   support

      Normal       0.34      0.01      0.01      3163
   Anomalous       0.54      0.99      0.70      3684

    accuracy                           0.54      6847
   macro avg       0.44      0.50      0.35      6847
weighted avg       0.45      0.54      0.38      6847

