# EBD - AI Model Training & Development

This notebook is the central hub for training, evaluating, and exporting the AI models used in the Employee Behavior Detection (EBD) system.

**Models to be handled:**
1.  **Object Detection (YOLOv8):** For detecting people and desks.
2.  **Face Recognition (MTCNN & FaceNet/ArcFace):** For identifying employees.
3.  **Person Tracking (DeepSORT):** For tracking individuals across frames.
4.  **Activity Classification (CNN/LSTM or Transformer-based):** For classifying employee behaviors.

**Workflow:**
1.  **Setup:** Mount Google Drive, install dependencies.
2.  **Data Loading:** Load the Edinburgh Office Dataset from Google Drive.
3.  **Model Training/Fine-tuning:** Train or fine-tune each model.
4.  **Evaluation:** Evaluate model performance.
5.  **Export:** Save the trained models to Google Drive for the backend to use.

## 1. Setup Environment

In [None]:
try:
    from google.colab import drive
    import sys
    COLAB_ENV = True
    print("✅ Detected Google Colab environment. Mounting Google Drive...")
    drive.mount('/content/drive')
    # Add project path to sys.path for module imports
    # sys.path.append('/content/drive/MyDrive/EBD/ai_processor') 
except ImportError:
    COLAB_ENV = False
    print("❌ Not in Google Colab. Ensure your local environment is set up correctly.")

# Install necessary packages
!pip install ultralytics -q
!pip install facenet-pytorch -q
!pip install deep-sort-realtime -q
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html
!pip install mmdet
!pip install mmpose

print("✅ Dependencies installed.")

## 2. Configuration and Data Loading

In [None]:
import os
from pathlib import Path
import torch

# Define paths
if COLAB_ENV:
    GDRIVE_PATH = Path('/content/drive/MyDrive/')
    DATASET_PATH = GDRIVE_PATH / 'Datasets/EdinburghOffice'
    MODEL_SAVE_PATH = GDRIVE_PATH / 'EBD/models'
else:
    # Adjust for local development if needed
    GDRIVE_PATH = Path('./')
    DATASET_PATH = GDRIVE_PATH / 'edinburgh_office_dataset'
    MODEL_SAVE_PATH = GDRIVE_PATH / 'models'

MODEL_SAVE_PATH.mkdir(parents=True, exist_ok=True)

print(f"Dataset path: {DATASET_PATH}")
print(f"Model save path: {MODEL_SAVE_PATH}")

# Verify dataset existence
if not DATASET_PATH.exists():
    print("❌ Dataset not found! Please run the `download_dataset.ipynb` notebook first.")
else:
    print("✅ Dataset found.")

# Check for GPU
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")

## 3. Person and Desk Detection (YOLOv8)

In [None]:
from ultralytics import YOLO

# Load a pretrained YOLOv8 model
detection_model = YOLO('yolov8n.pt')  # yolov8n is small and fast

# It's recommended to fine-tune the model on a custom dataset of office environments
# For now, we will use the pre-trained model for demonstration

# Example of running inference on an image from the dataset
# Note: You'll need to find an actual image file in your dataset
try:
    image_files = list(DATASET_PATH.rglob('*.jpg')) + list(DATASET_PATH.rglob('*.png'))
    if image_files:
        sample_image_path = image_files[0]
        print(f"Running detection on sample image: {sample_image_path}")
        results = detection_model(sample_image_path)
        results[0].show()
    else:
        print("No sample images found to test detection.")
except Exception as e:
    print(f"Error during detection test: {e}")

# Export the model for later use in the backend
exported_model_path = detection_model.export(format='onnx') # Export to ONNX for backend flexibility
print(f"Detection model exported to: {exported_model_path}")

# Move to GDrive
if COLAB_ENV:
    final_path = MODEL_SAVE_PATH / 'yolov8n_detection.onnx'
    os.rename(exported_model_path, final_path)
    print(f"Moved to {final_path}")

## 4. Face Recognition (MTCNN + FaceNet)

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image

# Initialize models
mtcnn = MTCNN(keep_all=True, device=DEVICE)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(DEVICE)

def generate_face_embeddings(image_path):
    """Detects faces and generates embeddings."""
    try:
        img = Image.open(image_path)
        # Detect faces
        boxes, _ = mtcnn.detect(img)
        if boxes is None:
            print(f"No faces detected in {image_path.name}")
            return None
        
        # Get embeddings
        faces = mtcnn(img)
        if faces is None:
            return None
        embeddings = resnet(faces.to(DEVICE))
        return embeddings
    except Exception as e:
        print(f"Error processing {image_path.name}: {e}")
        return None

# This part requires a curated set of employee images for creating a reference database.
# For now, we'll just demonstrate the process and save the models.

# Save the models for the backend
# The models from facenet-pytorch are loaded from torch.hub, so for the backend,
# we just need to ensure the library is installed.
# We can, however, save the state_dict if we fine-tune them.

torch.save(resnet.state_dict(), MODEL_SAVE_PATH / 'facenet_resnet_vggface2.pt')
print(f"FaceNet model state_dict saved to {MODEL_SAVE_PATH / 'facenet_resnet_vggface2.pt'}")

# Example usage
if image_files:
    print(f"\nGenerating embeddings for sample image: {sample_image_path.name}")
    embeddings = generate_face_embeddings(sample_image_path)
    if embeddings is not None:
        print(f"Generated {len(embeddings)} embeddings with shape {embeddings.shape}")

## 5. Person Tracking (DeepSORT)

In [None]:
from deep_sort_realtime.deepsort_tracker import DeepSort

# DeepSORT is typically used in a real-time video processing pipeline.
# It takes the bounding boxes from a detector (like YOLOv8) as input.

# Initialize the tracker
tracker = DeepSort(max_age=30) # max_age is the number of frames to keep a track alive without detection

print("DeepSORT tracker initialized.")

# The integration of YOLOv8 + DeepSORT happens in the backend processing pipeline.
# There isn't a specific 'model' file to save for DeepSORT itself, as it's an algorithm.
# The backend will import and use the library directly.

## 6. Activity Classification

In [None]:
# This is the most complex part and requires a custom-labeled dataset.
# The process would be:
# 1. For each tracked person, extract a sequence of frames (e.g., 16 frames).
# 2. Extract pose information using a model like MMPose.
# 3. Feed the sequence of poses or image crops into a classifier (e.g., a 3D-CNN, LSTM, or a Video Transformer like TimeSformer).
# 4. Train the classifier on labeled data (e.g., 'typing', 'on_phone', 'talking').

# Due to the complexity and lack of a pre-labeled public dataset for these specific activities,
# we will outline the steps and prepare a placeholder for the model.

print("Activity Classification: Model training pipeline needs to be built.")
print("This requires a custom dataset with labeled activities.")

# Placeholder for a future trained model
placeholder_content = "This is a placeholder for the activity classification model."
with open(MODEL_SAVE_PATH / 'activity_classifier_placeholder.txt', 'w') as f:
    f.write(placeholder_content)

print(f"Placeholder created at: {MODEL_SAVE_PATH / 'activity_classifier_placeholder.txt'}")

## 7. Full Pipeline Simulation (Conceptual)

In [None]:
import cv2
import numpy as np

def process_frame(frame):
    # 1. Detect persons and desks using YOLOv8
    detections = detection_model(frame)[0].boxes.data.cpu().numpy()
    # Detections are in format [x1, y1, x2, y2, conf, class_id]
    
    person_detections = []
    for det in detections:
        if int(det[5]) == 0: # Class ID for 'person' in COCO is 0
            bbox = det[:4]
            conf = det[4]
            person_detections.append((bbox, conf, 'person'))

    # 2. Update tracker with person detections
    tracks = tracker.update_tracks(person_detections, frame=frame)
    
    # 3. Process each track
    for track in tracks:
        if not track.is_confirmed():
            continue
        
        track_id = track.track_id
        ltrb = track.to_ltrb()
        
        # Draw bounding box
        cv2.rectangle(frame, (int(ltrb[0]), int(ltrb[1])), (int(ltrb[2]), int(ltrb[3])), (0, 255, 0), 2)
        cv2.putText(frame, f"ID: {track_id}", (int(ltrb[0]), int(ltrb[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # 4. Face Recognition (on confirmed tracks, periodically)
        # This would involve cropping the face, getting the embedding, and matching it to a database.
        
        # 5. Activity Classification (on confirmed tracks)
        # This would involve feeding a sequence of frames/poses for this track_id to the classifier.
        
    return frame

print("Conceptual pipeline function `process_frame` is defined.")
print("This logic will be implemented in the backend service.")