# Digital Witness - LSTM Model Training

This notebook trains the LSTM behavior classifier for the Digital Witness system.



## Step 1: Mount Google Drive & Extract Project

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Check if zip file exists
import os
zip_path = "/content/drive/MyDrive/DigitalWitness.zip"

if os.path.exists(zip_path):
    print("Found DigitalWitness.zip in Google Drive!")
else:
    print("ERROR: DigitalWitness.zip not found in Google Drive root!")
    print("Please upload the zip file to your Google Drive.")

In [None]:
# Extract project files
!unzip -q "/content/drive/MyDrive/DigitalWitness.zip" -d /content/

# List extracted contents
!ls /content/Project_DigitalWitness/

## Step 2: Install Dependencies

In [None]:
!pip install -q torch torchvision ultralytics opencv-python-headless mediapipe numpy
print("Dependencies installed!")

## Step 3: Verify GPU & Check Training Data

In [None]:
import torch

print("=" * 50)
print("GPU STATUS")
print("=" * 50)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("WARNING: No GPU detected! Training will be slow.")
    print("Go to Runtime → Change runtime type → T4 GPU")

In [None]:
from pathlib import Path

# Check training data
project_root = Path("/content/Project_DigitalWitness")
normal_dir = project_root / "data" / "training" / "normal"
shoplifting_dir = project_root / "data" / "training" / "shoplifting"

print("=" * 50)
print("TRAINING DATA")
print("=" * 50)

normal_videos = list(normal_dir.glob("*.mp4")) if normal_dir.exists() else []
shoplifting_videos = list(shoplifting_dir.glob("*.mp4")) if shoplifting_dir.exists() else []

print(f"Normal videos: {len(normal_videos)}")
print(f"Shoplifting videos: {len(shoplifting_videos)}")

if len(normal_videos) == 0 or len(shoplifting_videos) == 0:
    print("\nERROR: Training videos not found!")
    print(f"Expected in: {normal_dir}")
    print(f"Expected in: {shoplifting_dir}")
else:
    print("\nTraining data ready!")

## Step 4: Train the LSTM Model

This will:
1. Initialize CNN (ResNet18) for feature extraction
2. Process all training videos
3. Train LSTM classifier
4. Save model to `models/lstm_classifier.pt`

In [None]:
import sys
sys.path.insert(0, '/content/Project_DigitalWitness')

# Create models directory if it doesn't exist
import os
os.makedirs('/content/Project_DigitalWitness/models', exist_ok=True)

from src.models.train_deep_model import train_lstm_classifier

# Train the model
# Adjust parameters as needed:
# - epochs: more epochs = better accuracy but longer training
# - max_videos_per_class: set to 10-20 for quick test, None for full training

results = train_lstm_classifier(
    epochs=50,
    batch_size=32,
    learning_rate=0.001,
    val_split=0.2,
    max_videos_per_class=None  # Set to 10 for quick test
)

print("\n" + "=" * 50)
print("TRAINING RESULTS")
print("=" * 50)
if results.get('success'):
    print(f"Model saved: {results['model_path']}")
    print(f"Training accuracy: {results['info']['final_train_acc']:.1%}")
    print(f"Validation accuracy: {results['info']['final_val_acc']:.1%}")
else:
    print(f"Training failed: {results.get('error')}")

## Step 5: Save Trained Model to Google Drive

Copy the trained model back to your Google Drive so you can download it.

In [None]:
import shutil

# Source paths
model_path = "/content/Project_DigitalWitness/models/lstm_classifier.pt"
info_path = "/content/Project_DigitalWitness/models/lstm_classifier_info.json"

# Destination (Google Drive)
drive_dest = "/content/drive/MyDrive/DigitalWitness_Models/"
os.makedirs(drive_dest, exist_ok=True)

# Copy files
if os.path.exists(model_path):
    shutil.copy(model_path, drive_dest)
    print(f"Copied: lstm_classifier.pt")
else:
    print("ERROR: Model file not found!")

if os.path.exists(info_path):
    shutil.copy(info_path, drive_dest)
    print(f"Copied: lstm_classifier_info.json")

print(f"\nFiles saved to: {drive_dest}")
print("\nYou can now download from Google Drive:")
print("  drive.google.com → DigitalWitness_Models folder")

## Step 6: (Optional) Quick Test - Run Inference

Test the trained model on a sample video.

In [None]:
# Optional: Test the trained model
from src.models.lstm_classifier import LSTMIntentClassifier
from src.models.cnn_feature_extractor import CNNFeatureExtractor
import cv2
import numpy as np

# Load trained model
lstm = LSTMIntentClassifier(input_dim=512, num_classes=2)
lstm.load_model("/content/Project_DigitalWitness/models/lstm_classifier.pt")

print("Model loaded successfully!")
print(f"Classes: {lstm.classes}")
print("\nModel is ready for inference.")