In [1]:
!pip install ultralytics tensorflow opencv-python fastapi uvicorn aiohttp yt-dlp pafy



# Load tensorflow model

In [2]:
# Load YOLO model
from ultralytics import YOLO

MODEL_PATH = "runs/detect/train/weights/best.pt"
model = YOLO(MODEL_PATH)

print(f"✅ YOLO model loaded: {MODEL_PATH}")

✅ YOLO model loaded: runs/detect/train/weights/best.pt


# Real time video live container damage detection
- Ingest video from youtube
- Ensure the video is reliable with YOLOV8 model are trained
- YOLOV8 Custom Model (the trained weights)
- Live detection overlay
- Risk Engine + RAG Trigger (optional)

# Extract direct video stream

In [3]:
import subprocess
import json

# Extract direct video stream
def get_stream_url(youtube_url):
    cmd = [
        "yt-dlp",
        "-f",
        "best[ext=mp4]",
        "-g",
        youtube_url
    ]
    stream_url = subprocess.check_output(cmd).decode().strip()
    return stream_url

# OpenCV Video Stream Loader

# Download YouTube Video (Recommended)

In [4]:
import cv2

# Use local video file
VIDEO_PATH = "dataset/videos/container_video.mov"

try:
    cap = cv2.VideoCapture(VIDEO_PATH)
    
    if not cap.isOpened():
        raise Exception(f"Cannot open video file: {VIDEO_PATH}")
    
    # Get video info
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"✅ Video loaded: {VIDEO_PATH}")
    print(f"   - Resolution: {width}x{height}")
    print(f"   - FPS: {fps}")
    print(f"   - Total frames: {total_frames}")
        
except Exception as e:
    print(f"❌ Failed to open video: {e}")

✅ Video loaded: dataset/videos/container_video.mov
   - Resolution: 2940x1912
   - FPS: 49
   - Total frames: 3845


# Real-Time YOLOv8 Detection Loop

In [None]:
import numpy as np

frame_count = 0
detection_count = 0
frame_skip = 4  # Skip frames untuk video lebih cepat (ubah ke 3 atau 4 untuk lebih cepat lagi)

# Create named window with close button
cv2.namedWindow("PORT CONTAINER INSPECTION REALTIME DETECTION", cv2.WINDOW_NORMAL)

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        print("⚠️ End of video")
        break

    # Ensure frame is valid numpy array
    if not isinstance(frame, np.ndarray):
        continue

    frame_count += 1
    
    # Skip frames untuk speed up video
    if frame_count % frame_skip != 0:
        continue

    # OPTIMIZED FOR DENSE CONTAINER DETECTION (Port/Terminal scenes)
    results = model.predict(
        source=frame,
        imgsz=1280,          # Larger size untuk container kecil di kejauhan
        conf=0.08,           # Lower threshold for container with occlusion
        iou=0.25,            # Lower IOU agar detect container yang overlap
        max_det=600,         # Increase maximum detections untuk scene padat
        stream=False,
        verbose=False,       # Set False agar tidak spam console
        agnostic_nms=False,
        classes=None         # Detect all classes
    )

    annotated_frame = results[0].plot(
        line_width=2,        # Bounding box line width
        font_size=10,        # Label font size
        labels=True,         # Show labels
        conf=True            # Show confidence
    )
    
    # Count detections
    boxes = results[0].boxes
    num_detections = len(boxes)
    detection_count += num_detections
    
    # Add comprehensive detection info on frame
    info_text = f"Frame: {frame_count} | Containers: {num_detections} | Total: {detection_count} | SPEED x{frame_skip}"
    cv2.rectangle(annotated_frame, (5, 5), (700, 45), (0, 0, 0), -1)  # Background
    cv2.putText(annotated_frame, info_text,
                (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8, (0, 255, 0), 2)
    
    # Tambah instruksi di bawah
    instruction_text = "ESC=Exit | SPACE=Pause | X (window)=Close"
    cv2.rectangle(annotated_frame, (5, annotated_frame.shape[0]-35), (500, annotated_frame.shape[0]-5), (0, 0, 0), -1)
    cv2.putText(annotated_frame, instruction_text,
                (10, annotated_frame.shape[0]-15),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
    
    # Show the frame with detections
    cv2.imshow("PORT CONTAINER INSPECTION REALTIME DETECTION", annotated_frame)

    # Check if window was closed (X button)
    if cv2.getWindowProperty("PORT CONTAINER INSPECTION REALTIME DETECTION", cv2.WND_PROP_VISIBLE) < 1:
        print("⚠️ Window closed by user")
        break

    # Press ESC to exit, SPACE to pause
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC
        print("⚠️ ESC pressed - Exiting...")
        break
    elif key == 32:  # SPACE
        print("⏸️  PAUSED - Press any key to continue...")
        cv2.waitKey(0)
    elif key == ord('q') or key == ord('Q'):  # Q juga bisa exit
        print("⚠️ Q pressed - Exiting...")
        break

# Close the stream
cap.release()
cv2.destroyAllWindows()
print(f"✅ Processing complete!")
print(f"   - Total frames: {frame_count}")
print(f"   - Frames processed: {frame_count // frame_skip}")
print(f"   - Total detections: {detection_count}")
print(f"   - Avg containers/frame: {detection_count/(frame_count//frame_skip):.2f}")
print(f"   - Speed multiplier: x{frame_skip}")

⚠️ End of video
✅ Processing complete!
   - Total frames: 3824
   - Frames processed: 1274
   - Total detections: 4485
   - Avg containers/frame: 3.52
   - Speed multiplier: x3


# Real-Time performance optimation

In [8]:
# PERFORMANCE OPTIMIZATION VERSION
import numpy as np
import cv2

# BUKA ULANG VIDEO (karena video sudah habis di cell sebelumnya)
VIDEO_PATH = "dataset/videos/container_video.mov"
cap = cv2.VideoCapture(VIDEO_PATH)

if not cap.isOpened():
    print(f"❌ Cannot open video: {VIDEO_PATH}")
else:
    print(f"✅ Video reopened for fast processing")

frame_count = 0
detection_count = 0
frame_skip = 4  # Skip every 4th frame untuk performa lebih cepat

# Create named window with close button
cv2.namedWindow("FAST CONTAINER DETECTION", cv2.WINDOW_NORMAL)

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        print("⚠️ End of video")
        break

    frame_count += 1
    
    # Skip frames untuk speed up
    if frame_count % frame_skip != 0:
        continue

    # OPTIMIZED DETECTION
    results = model.predict(
        source=frame,
        imgsz=1280,           # Larger size untuk container kecil di kejauhan
        conf=0.1,
        iou=0.25,
        max_det=600,
        stream=False,
        verbose=False,
        half=True,           # GPU half precision (FP16) untuk speed
        device=0             # Use GPU 0 (ubah ke 'cpu' jika tidak ada GPU)
    )

    annotated_frame = results[0].plot()
    
    boxes = results[0].boxes
    num_detections = len(boxes)
    detection_count += num_detections
    
    # Info overlay (top)
    info_text = f"Frame: {frame_count} | Det: {num_detections} | Total: {detection_count} | SPEED x{frame_skip}"
    cv2.rectangle(annotated_frame, (5, 5), (750, 45), (0, 0, 0), -1)
    cv2.putText(annotated_frame, info_text, (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    # Instruksi kontrol (bottom)
    instruction_text = "ESC/Q=Exit | SPACE=Pause | X (window)=Close"
    cv2.rectangle(annotated_frame, (5, annotated_frame.shape[0]-35), (500, annotated_frame.shape[0]-5), (0, 0, 0), -1)
    cv2.putText(annotated_frame, instruction_text,
                (10, annotated_frame.shape[0]-15),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
    
    cv2.imshow("FAST CONTAINER DETECTION", annotated_frame)

    # Check if window was closed (X button)
    if cv2.getWindowProperty("FAST CONTAINER DETECTION", cv2.WND_PROP_VISIBLE) < 1:
        print("⚠️ Window closed by user (X button)")
        break

    # Keyboard controls
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC to exit
        print("⚠️ ESC pressed - Exiting...")
        break
    elif key == ord('q') or key == ord('Q'):  # Q to exit
        print("⚠️ Q pressed - Exiting...")
        break
    elif key == 32:  # SPACE to pause
        print("⏸️  PAUSED - Press any key to continue...")
        cv2.waitKey(0)

cap.release()
cv2.destroyAllWindows()
print(f"✅ Fast processing complete!")
print(f"   - Total frames processed: {frame_count}")
print(f"   - Frames analyzed: {frame_count // frame_skip}")
print(f"   - Total detections: {detection_count}")
if frame_count > 0:
    print(f"   - Avg containers/frame: {detection_count/(frame_count//frame_skip):.2f}")
print(f"   - Speed multiplier: x{frame_skip}")

✅ Video reopened for fast processing
⚠️ End of video
✅ Fast processing complete!
   - Total frames processed: 3824
   - Frames analyzed: 956
   - Total detections: 2456
   - Avg containers/frame: 2.57
   - Speed multiplier: x4


# DAMAGE COUNTER Overlay

In [9]:
CLASS_MAP = {
    0: "dent",
    1: "rust",
    2: "broken_door",
    3: "leak"
}

def extract_counts(result):
    counts = {"dent":0, "rust":0, "broken_door":0, "leak":0}
    boxes = result.boxes

    for cls in boxes.cls:
        name = CLASS_MAP[int(cls)]
        counts[name] += 1

    return counts

# Add loop after inference
result = results[0]
counts = extract_counts(result)
text = f"Dent:{counts['dent']} Rust:{counts['rust']} Broken Door:{counts['broken_door']} Leak:{counts['leak']}"

cv2.putText(annotated_frame,text,
            (20,40),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,(0,255,0), 2)

array([[[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       ...,

       [[16, 12, 15],
        [16, 12, 15],
        [16, 12, 15],
        ...,
        [23, 22, 24],
        [23, 22, 24],
        [23, 22, 24]],

       [[16, 12, 15],
        [16, 12, 15],
        [16, 12, 15],
        ...,
        [23, 22, 24],
        [23, 22, 24],
        [23, 22, 24]],

       [[16, 12, 15],
        [16, 12, 15],
        [16, 12, 15],
        ...,
        [23, 22, 24],
        [23, 22, 24],
        [23, 22, 24]]], dtype=uint8)

# REAL BUSINESS MODE — Risk Trigger

In [10]:
severity = (
    counts['dent']*1 +
    counts['rust']*2 +
    counts['broken_door']*3 +
    counts['leak']*4
)

if severity >= 4:
    print("⚠️ High risk container detected! Triggering alert...")
    # Here you can add code to send an alert (e.g., email, SMS, webhook)

⚠️ High risk container detected! Triggering alert...


# Production Version (no GUI)

In [None]:
# Production Version - No GUI, streaming mode
VIDEO_PATH = "dataset/videos/container_video.mov"

for result in model.predict(
    source=VIDEO_PATH,
    stream=True,
    verbose=True,
    conf=0.25,
    imgsz=640
):
    boxes = result.boxes

    if len(boxes) > 0:
        print(f"Damage detected: {len(boxes)} containers")


video 1/1 (frame 1/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 29.9ms
video 1/1 (frame 2/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 28.7ms
video 1/1 (frame 3/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 30.1ms
video 1/1 (frame 4/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 1 rust, 29.1ms
Damage detected: 1 containers
video 1/1 (frame 5/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 1 rust, 29.9ms
Damage detected: 1 containers
video 1/1 (frame 6/3845) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 27.7ms
video 1/1 (frame 7/3845) /Users/miftahhadiyan