# Driver Distraction Detection — Video Analysis Pipeline

This notebook loads a trained custom CNN model and applies it frame-by-frame on input driving videos to detect distracted behaviors.

The main goals:
- Load a `.mp4` video
- Sample frames at a specified FPS
- Predict the driver’s behavior using the CNN model
- Track and annotate behaviors over time
- Output a dictionary summary of detected offences


### 1. Load Model and Define Analysis Function

We load the trained Keras model and define `analyze_video_for_offences`, a function that:
- Loads the video
- Extracts frames at intervals
- Classifies each frame
- Tracks consistent predictions (e.g., 10+ frames of the same class)
- Returns a dictionary of detected offences with timestamps

### 2. Flask Integration, end to end

We integrate the pipeline into a small Flask application so passengers can upload,employees can analyze, and make decisions in a browser.
- We accept video uploads, store basic metadata, and trigger analysis.
- We run the analysis function, collect offence segments, and save evidence snapshots in a static location.
- We render a results page that shows a gallery of snapshots and a clear flag decision.
- We record flag decisions for follow up and list uploaded items and flagged cases in simple views.
- We display a loading overlay during analysis to communicate progress and keep the experience smooth.

In [None]:
# importing required libraries
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# loading trained CNN model
model = tf.keras.models.load_model("custom_cnn_model")
target_classes = ['safe_driving', 'using_phone', 'drinking']

def analyze_video_for_offences(video_path, min_offence_duration=10, fps_extract=5, conf_threshold=0.9, num_end_frames=3):
    # opening video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Could not open video!")
        return {}

    # reading video properties
    real_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = total_frames / real_fps
    frame_interval = int(real_fps / fps_extract)
    frame_idx = 0
    timeline = []

    # performing pass 1: predicting frames
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_idx % frame_interval == 0:
            timestamp_sec = frame_idx / real_fps
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = cv2.resize(frame_rgb, (224, 224)).astype("float32") / 255.0
            img = np.expand_dims(img, axis=0)
            preds = model.predict(img, verbose=0)[0]
            top_idx = np.argmax(preds)
            top_conf = preds[top_idx]
            pred_class = target_classes[top_idx] if top_conf >= conf_threshold else "UNKNOWN"
            timeline.append((timestamp_sec, pred_class, top_conf))
        frame_idx += 1
    cap.release()
    print(f"Processed {len(timeline)} sampled frames over {video_duration:.1f}s")

    # merging consecutive timeline segments
    segments = []
    if timeline:
        current_class = timeline[0][1]
        start_time = timeline[0][0]
        for i in range(1, len(timeline)):
            ts, cls, _ = timeline[i]
            if cls != current_class:
                segments.append((start_time, timeline[i-1][0], current_class))
                current_class = cls
                start_time = ts
        segments.append((start_time, timeline[-1][0], current_class))

    # filtering and collecting offences
    offences = []
    for (start, end, cls) in segments:
        duration = end - start
        if cls in ['using_phone', 'drinking'] and duration >= min_offence_duration:
            segment_entries = [(ts, c, conf) for ts, c, conf in timeline if start <= ts <= end and c == cls]
            if segment_entries:
                best_frame = max(segment_entries, key=lambda x: x[2])
                best_ts = best_frame[0]
                best_conf = best_frame[2]
                segment_end_entries = sorted(segment_entries, key=lambda x: x[0])[-num_end_frames:]
                end_frame_times = [ts for ts, _, _ in segment_end_entries]
                offences.append((start, end, cls, best_ts, best_conf, end_frame_times))

    # extracting snapshots for detected offences
    results = {"offences": []}
    if offences:
        cap = cv2.VideoCapture(video_path)
        for (start, end, cls, best_ts, best_conf, end_frame_times) in offences:
            frame_images = []
            frame_labels = []
            
            # capturing best frame
            cap.set(cv2.CAP_PROP_POS_MSEC, best_ts * 1000)
            ret, frame = cap.read()
            if ret:
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame_images.append(frame_rgb)
                frame_labels.append(f"BEST ({best_conf:.2f})")

            # capturing end frames
            for ts in end_frame_times:
                cap.set(cv2.CAP_PROP_POS_MSEC, ts * 1000)
                ret, frame = cap.read()
                if ret:
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    frame_images.append(frame_rgb)
                    frame_labels.append(f"End @ {int(ts)}s")

            # creating combined snapshot figure
            n_frames = len(frame_images)
            fig, axes = plt.subplots(1, n_frames, figsize=(4*n_frames, 4))
            if n_frames == 1:
                axes.imshow(frame_images[0])
                axes.set_title(frame_labels[0])
                axes.axis("off")
            else:
                for i, ax in enumerate(axes):
                    ax.imshow(frame_images[i])
                    ax.set_title(frame_labels[i])
                    ax.axis("off")

            combined_name = f"static/combined_snapshots/offence_{cls}_{int(start)}s_to_{int(end)}s_combined.jpg"
            os.makedirs("static/combined_snapshots", exist_ok=True)
            plt.suptitle(f"{cls.upper()} from {start:.1f}s to {end:.1f}s (duration {end-start:.1f}s)")
            plt.tight_layout()
            plt.savefig(combined_name)
            plt.close()

            # storing offence details in results
            results["offences"].append({
                "class": cls,
                "start": start,
                "end": end,
                "duration": end - start,
                "best_confidence": best_conf,
                "combined_snapshot": combined_name,
                "frame_count": n_frames
            })
        cap.release()

    # printing final offence report
    print("\nOffence Report:")
    for offence in results["offences"]:
        print(f" - {offence['class'].upper()} [{offence['best_confidence']*100:.1f}%] "
              f"from {offence['start']:.1f}s to {offence['end']:.1f}s "
              f"→ Combined snapshot: {offence['combined_snapshot']}")

    return results

# executing analysis with Flask integration parameters
results = analyze_video_for_offences(
    video_path=video_path,
    min_offence_duration=4,
    fps_extract=5,
    conf_threshold=0.9,
    num_end_frames=3
)