# LSTM Model for Human Detection and Tracking

In [34]:
import os
import cv2
import numpy as np
import json
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split


## Load Dataset

In [None]:
# Set base directory and paths
base_dir = os.path.abspath('dataset/personpath22')
video_path = os.path.join(base_dir, 'raw_data')
annotation_dir = os.path.join(base_dir, 'annotation')

# Load video files
video_files = [f for f in os.listdir(video_path) if f.endswith('.mp4')]
print('Video files:', video_files)


## Load Annotations

In [36]:
annotations = {}
for video in video_files:
    uid = video.split('.')[0]
    
    # Define possible annotation paths
    annotation_paths = [
        os.path.join(annotation_dir, 'anno_amodal_2022', uid + '.mp4.json'),
        os.path.join(annotation_dir, 'anno_visible_2022', uid + '.mp4.json')
    ]
    
    # Try to load annotations from both possible paths
    loaded = False
    for annotation_path in annotation_paths:
        if os.path.exists(annotation_path):
            try:
                with open(annotation_path, 'r') as f:
                    annotations[uid] = json.load(f)
                loaded = True
                break
            except json.JSONDecodeError:
                print(f'Error decoding JSON for {uid}. Skipping.')
    
    if not loaded:
        print(f'Annotation file not found for {uid}. Skipping.')


## Preprocess Data

In [None]:
def preprocess_data(video_file, annotation):
    # Load video
    cap = cv2.VideoCapture(video_file)
    frames = []
    labels = []

    # Extract all frame indices with their corresponding labels
    frame_labels = {entity['blob']['frame_idx']: entity['labels'].get('person', 0)
                    for entity in annotation['entities']}

    # Read video frames
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame to 64x64 for consistency
        frame = cv2.resize(frame, (64, 64))
        frames.append(frame)

        # Get the label for the current frame
        label = frame_labels.get(frame_idx, 0)
        labels.append(label)

        frame_idx += 1

    cap.release()
    frames = np.array(frames)
    labels = np.array(labels)

    return frames, labels

example_video = os.path.join(video_path, video_files[0])
video_id = video_files[0].split('.')[0]
frames, labels = preprocess_data(example_video, annotations[video_id])

print('Frames shape:', frames.shape)
print('Labels shape:', labels.shape)
print('Labels:', labels)


In [38]:
def load_annotation_data(file_path):
    """Load annotation data from the given file path."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Annotation file not found: {file_path}")
    
    with open(file_path, 'r') as f:
        return json.load(f)  # Assuming annotation data is in JSON format


In [39]:
def extract_features_from_annotation(annotation_data):
    """Extract features from annotation data (modify based on data structure)."""
    print("Extracting features from annotation data:", annotation_data)  # Debug line
    if not annotation_data:
        print("No annotation data available.")
        return np.array([])  # Return an empty array if data is not available
    
    # Example placeholder logic – modify based on the actual structure of your annotation data
    features = annotation_data.get('features', [])
    if len(features) == 0:
        print("No features found in annotation data.")
    return np.array(features)


In [None]:
def load_dataset():
    """Load features and labels for all videos in the dataset."""
    X, y = [], []

    print("Loading annotations...")

    # Iterate through video files
    for video_file in video_files:
        video_name = os.path.splitext(video_file)[0]
        
        # Define paths to the annotation directories
        amodel_path = os.path.join(annotation_dir, 'anno_amodal_2022', video_name + '.mp4.json')
        visible_path = os.path.join(annotation_dir, 'anno_visible_2022', video_name + '.mp4.json')

        print(f"Trying to load annotations for: {video_name}")
        print(f"Amodal Path: {amodel_path}")
        print(f"Visible Path: {visible_path}")

        # Attempt to load annotation data
        try:
            amodel_data = load_annotation_data(amodel_path)
            visible_data = load_annotation_data(visible_path)
        except FileNotFoundError as e:
            print(f"Skipping {video_file}: {e}")
            continue
        except json.JSONDecodeError:
            print(f"Error decoding JSON for {video_name}. Skipping.")
            continue

        amodel_features = extract_features_from_annotation(amodel_data)
        visible_features = extract_features_from_annotation(visible_data)

        # Print the shapes of the extracted features
        print(f"Amodal Features Shape for {video_name}: {amodel_features.shape}")
        print(f"Visible Features Shape for {video_name}: {visible_features.shape}")

        # Check if features are 1D, if so, reshape them to 2D
        if amodel_features.ndim == 1:
            amodel_features = amodel_features.reshape(-1, 1)
        if visible_features.ndim == 1:
            visible_features = visible_features.reshape(-1, 1)

        try:
            combined_features = np.concatenate([amodel_features, visible_features], axis=1)
        except ValueError as e:
            print(f"Feature shape mismatch for {video_name}: {e}")
            continue

        X.append(combined_features[:-1])  # Input sequence
        y.append(combined_features[1:])   # Predict next timestep

    return np.array(X, dtype=object), np.array(y, dtype=object)

# Load dataset
X, y = load_dataset()
print("Dataset loaded successfully")
print(f"X shape: {X.shape}, y shape: {y.shape}")


## Prepare Training Data

In [None]:
# Prepare the data for LSTM
def create_dataset(frames, labels, time_step=10):
    X, y = [], []
    for i in range(len(frames) - time_step):
        # Reshape the frames for LSTM input (combine height and width into a single dimension)
        X.append(frames[i:i + time_step].reshape(time_step, -1))  # Reshape to (time_step, features)
        y.append(labels[i + time_step])  # Adjust based on your label structure
    return np.array(X), np.array(y)

# Assuming 'frames' and 'labels' are already defined
X, y = create_dataset(frames, labels)

# Check the new shapes
print('X shape:', X.shape)  # Should be (num_samples, time_step, features)
print('y shape:', y.shape)  # Adjust as needed

## Build LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(50, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))  # For binary classification
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [None]:
print(f'X shape: {X.shape}, y shape: {y.shape}')
print('Any NaN in X:', np.isnan(X).any())
print('Any NaN in y:', np.isnan(y).any())


## Train the Model

In [44]:
y = y.reshape(-1, 1)  # Reshape to (418, 1) if necessary


In [None]:
# Fit the model
history = model.fit(X, y, batch_size=32, epochs=10, validation_split=0.2)

# Plot training history
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.title('Model Accuracy')
plt.show()


## Evaluate the Model

In [None]:
import numpy as np

# Assuming X and y are your full dataset features and labels
# For example:
# X = np.random.rand(500, 10, 64, 64, 3)  # Full dataset with 500 samples
# y = np.random.randint(0, 2, size=(500,))  # Corresponding labels

# Set the random seed for reproducibility
np.random.seed(42)

# Number of test samples you want to randomly select
num_test_samples = 84  # Adjust as needed

# Randomly select indices for test data
test_indices = np.random.choice(X.shape[0], size=num_test_samples, replace=False)

# Create the test datasets
X_test = X[test_indices]
y_test = y[test_indices]

# Optionally, remove the test data from the training set
X_train = np.delete(X, test_indices, axis=0)
y_train = np.delete(y, test_indices, axis=0)

# Print the shapes of the new datasets
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")

loss, accuracy = model.evaluate(X_test, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

In [None]:
# Save the entire model (architecture + weights + optimizer configuration)
model.save('human_detection_model.keras')
print("Model saved to human_detection_model.keras")

model.summary()