In [1]:
!pip install ultralytics tensorflow opencv-python fastapi uvicorn aiohttp yt-dlp pafy



# Load tensorflow model

In [2]:
# Load YOLO model
from ultralytics import YOLO

MODEL_PATH = "runs/detect/train/weights/best.pt"
model = YOLO(MODEL_PATH)

print(f"✅ YOLO model loaded: {MODEL_PATH}")

✅ YOLO model loaded: runs/detect/train/weights/best.pt


# Real time video live container damage detection
- Ingest video from youtube
- Ensure the video is reliable with YOLOV8 model are trained
- YOLOV8 Custom Model (the trained weights)
- Live detection overlay
- Risk Engine + RAG Trigger (optional)

# Extract direct video stream

In [3]:
import subprocess
import json

# Extract direct video stream
def get_stream_url(youtube_url):
    cmd = [
        "yt-dlp",
        "-f",
        "best[ext=mp4]",
        "-g",
        youtube_url
    ]
    stream_url = subprocess.check_output(cmd).decode().strip()
    return stream_url

# OpenCV Video Stream Loader

# Download YouTube Video (Recommended)

In [4]:
import cv2

# Use local video file
VIDEO_PATH = "dataset/videos/container_video.mov"

try:
    cap = cv2.VideoCapture(VIDEO_PATH)
    
    if not cap.isOpened():
        raise Exception(f"Cannot open video file: {VIDEO_PATH}")
    
    # Get video info
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"✅ Video loaded: {VIDEO_PATH}")
    print(f"   - Resolution: {width}x{height}")
    print(f"   - FPS: {fps}")
    print(f"   - Total frames: {total_frames}")
        
except Exception as e:
    print(f"❌ Failed to open video: {e}")

✅ Video loaded: dataset/videos/container_video.mov
   - Resolution: 2940x1912
   - FPS: 47
   - Total frames: 9784


# Real-Time YOLOv8 Detection Loop

In [None]:
import numpy as np

frame_count = 0
detection_count = 0
frame_skip = 4  # Skip frames untuk video lebih cepat (ubah ke 3 atau 4 untuk lebih cepat lagi)

# Create named window with close button
cv2.namedWindow("PORT CONTAINER INSPECTION REALTIME DETECTION", cv2.WINDOW_NORMAL)

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        print("⚠️ End of video")
        break

    # Ensure frame is valid numpy array
    if not isinstance(frame, np.ndarray):
        continue

    frame_count += 1
    
    # Skip frames untuk speed up video
    if frame_count % frame_skip != 0:
        continue

    # OPTIMIZED FOR DENSE CONTAINER DETECTION (Port/Terminal scenes)
    results = model.predict(
        source=frame,
        imgsz=1280,          # Larger size untuk container kecil di kejauhan
        conf=0.39,           # Lower threshold for container with occlusion
        iou=0.25,            # Lower IOU agar detect container yang overlap
        max_det=150,         # Increase maximum detections untuk scene padat
        stream=False,
        verbose=False,       # Set False agar tidak spam console
        agnostic_nms=False,
        classes=None         # Detect all classes
    )

    annotated_frame = results[0].plot(
        line_width=2,        # Bounding box line width
        font_size=10,        # Label font size
        labels=True,         # Show labels
        conf=True            # Show confidence
    )
    
    # Count detections
    boxes = results[0].boxes
    num_detections = len(boxes)
    detection_count += num_detections
    
    # Add comprehensive detection info on frame
    info_text = f"Frame: {frame_count} | Containers: {num_detections} | Total: {detection_count} | SPEED x{frame_skip}"
    cv2.rectangle(annotated_frame, (5, 5), (700, 45), (0, 0, 0), -1)  # Background
    cv2.putText(annotated_frame, info_text,
                (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8, (0, 255, 0), 2)
    
    # Tambah instruksi di bawah
    instruction_text = "ESC=Exit | SPACE=Pause | X (window)=Close"
    cv2.rectangle(annotated_frame, (5, annotated_frame.shape[0]-35), (500, annotated_frame.shape[0]-5), (0, 0, 0), -1)
    cv2.putText(annotated_frame, instruction_text,
                (10, annotated_frame.shape[0]-15),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
    
    # Show the frame with detections
    cv2.imshow("PORT CONTAINER INSPECTION REALTIME DETECTION", annotated_frame)

    # Check if window was closed (X button)
    if cv2.getWindowProperty("PORT CONTAINER INSPECTION REALTIME DETECTION", cv2.WND_PROP_VISIBLE) < 1:
        print("⚠️ Window closed by user")
        break

    # Press ESC to exit, SPACE to pause
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC
        print("⚠️ ESC pressed - Exiting...")
        break
    elif key == 32:  # SPACE
        print("⏸️  PAUSED - Press any key to continue...")
        cv2.waitKey(0)
    elif key == ord('q') or key == ord('Q'):  # Q juga bisa exit
        print("⚠️ Q pressed - Exiting...")
        break

# Close the stream
cap.release()
cv2.destroyAllWindows()
print(f"✅ Processing complete!")
print(f"   - Total frames: {frame_count}")
print(f"   - Frames processed: {frame_count // frame_skip}")
print(f"   - Total detections: {detection_count}")
print(f"   - Avg containers/frame: {detection_count/(frame_count//frame_skip):.2f}")
print(f"   - Speed multiplier: x{frame_skip}")

KeyboardInterrupt: 

# Real-Time performance optimation

In [None]:
# PERFORMANCE OPTIMIZATION VERSION
import numpy as np
import cv2

# BUKA ULANG VIDEO (karena video sudah habis di cell sebelumnya)
VIDEO_PATH = "dataset/videos/container_video.mov"
cap = cv2.VideoCapture(VIDEO_PATH)

if not cap.isOpened():
    print(f"❌ Cannot open video: {VIDEO_PATH}")
else:
    print(f"✅ Video reopened for fast processing")
    
frame_count = 0
detection_count = 0
frame_skip = 4  # Skip every 4th frame untuk performa lebih cepat

# Create named window with close button
cv2.namedWindow("FAST CONTAINER DETECTION", cv2.WINDOW_NORMAL)

while True:
    ret, frame = cap.read()
    if not ret or frame is None:
        print("⚠️ End of video")
        break

    frame_count += 1
    
    # Skip frames untuk speed up
    if frame_count % frame_skip != 0:
        continue

    # OPTIMIZED DETECTION
    results = model.predict(
        source=frame,
        imgsz=1280,           # Larger size untuk container kecil di kejauhan
        conf=0.39,
        iou=0.25,
        max_det=150,
        stream=False,
        verbose=True,
        half=True,           # GPU half precision (FP16) untuk speed
        device=0             # Use GPU 0 (ubah ke 'cpu' jika tidak ada GPU)
    )

    annotated_frame = results[0].plot()
    
    boxes = results[0].boxes
    num_detections = len(boxes)
    detection_count += num_detections
    
    # Info overlay (top)
    info_text = f"Frame: {frame_count} | Det: {num_detections} | Total: {detection_count} | SPEED x{frame_skip}"
    cv2.rectangle(annotated_frame, (5, 5), (750, 45), (0, 0, 0), -1)
    cv2.putText(annotated_frame, info_text, (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    # Instruksi kontrol (bottom)
    instruction_text = "ESC/Q=Exit | SPACE=Pause | X (window)=Close"
    cv2.rectangle(annotated_frame, (5, annotated_frame.shape[0]-35), (500, annotated_frame.shape[0]-5), (0, 0, 0), -1)
    cv2.putText(annotated_frame, instruction_text,
                (10, annotated_frame.shape[0]-15),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
    
    cv2.imshow("FAST CONTAINER DETECTION", annotated_frame)

    # Check if window was closed (X button)
    if cv2.getWindowProperty("FAST CONTAINER DETECTION", cv2.WND_PROP_VISIBLE) < 1:
        print("⚠️ Window closed by user (X button)")
        break

    # Keyboard controls
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC to exit
        print("⚠️ ESC pressed - Exiting...")
        break
    elif key == ord('q') or key == ord('Q'):  # Q to exit
        print("⚠️ Q pressed - Exiting...")
        break
    elif key == 32:  # SPACE to pause
        print("⏸️  PAUSED - Press any key to continue...")
        cv2.waitKey(0)

cap.release()
cv2.destroyAllWindows()
print(f"✅ Fast processing complete!")
print(f"   - Total frames processed: {frame_count}")
print(f"   - Frames analyzed: {frame_count // frame_skip}")
print(f"   - Total detections: {detection_count}")
if frame_count > 0:
    print(f"   - Avg containers/frame: {detection_count/(frame_count//frame_skip):.2f}")
print(f"   - Speed multiplier: x{frame_skip}")

✅ Video reopened for fast processing

0: 832x1280 1 dent, 5 rusts, 115.5ms
Speed: 5.9ms preprocess, 115.5ms inference, 0.8ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 1 dent, 3 rusts, 109.3ms
Speed: 4.9ms preprocess, 109.3ms inference, 0.7ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 1 dent, 5 rusts, 98.0ms
Speed: 5.0ms preprocess, 98.0ms inference, 0.9ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 1 dent, 4 rusts, 97.0ms
Speed: 3.9ms preprocess, 97.0ms inference, 0.6ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 1 dent, 5 rusts, 110.4ms
Speed: 4.5ms preprocess, 110.4ms inference, 0.6ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 2 dents, 4 rusts, 106.3ms
Speed: 4.6ms preprocess, 106.3ms inference, 0.8ms postprocess per image at shape (1, 3, 832, 1280)

0: 832x1280 1 dent, 6 rusts, 108.4ms
Speed: 4.1ms preprocess, 108.4ms inference, 0.8ms postprocess per image at shape (1, 3, 832, 1280)

0: 832

KeyboardInterrupt: 

# DAMAGE COUNTER Overlay

In [None]:
CLASS_MAP = {
    0: "dent",
    1: "rust",
    2: "broken_door",
    3: "leak"
}

def extract_counts(result):
    counts = {"dent":0, "rust":0, "broken_door":0, "leak":0}
    boxes = result.boxes

    for cls in boxes.cls:
        name = CLASS_MAP[int(cls)]
        counts[name] += 1

    return counts

# Add loop after inference
result = results[0]
counts = extract_counts(result)
text = f"Dent:{counts['dent']} Rust:{counts['rust']} Broken Door:{counts['broken_door']} Leak:{counts['leak']}"

cv2.putText(annotated_frame,text,
            (20,40),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,(0,255,0), 2)

array([[[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       [[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        ...,
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]],

       ...,

       [[24, 24, 24],
        [24, 24, 24],
        [24, 24, 24],
        ...,
        [19, 18, 20],
        [19, 18, 20],
        [19, 18, 20]],

       [[23, 23, 23],
        [23, 23, 23],
        [23, 23, 23],
        ...,
        [19, 18, 20],
        [19, 18, 20],
        [19, 18, 20]],

       [[23, 23, 23],
        [23, 23, 23],
        [23, 23, 23],
        ...,
        [19, 18, 20],
        [19, 18, 20],
        [19, 18, 20]]], dtype=uint8)

# REAL BUSINESS MODE — Risk Trigger

In [None]:
severity = (
    counts['dent']*1 +
    counts['rust']*2 +
    counts['broken_door']*3 +
    counts['leak']*4
)

if severity >= 4:
    print("⚠️ High risk container detected! Triggering alert...")
    # Here you can add code to send an alert (e.g., email, SMS, webhook)

⚠️ High risk container detected! Triggering alert...


# Production Version (no GUI)

In [None]:
# Production Version - No GUI, streaming mode
VIDEO_PATH = "dataset/videos/container_video.mov"

for result in model.predict(
    source=VIDEO_PATH,
    stream=True,
    verbose=True,
    conf=0.25,
    imgsz=640
):
    boxes = result.boxes

    if len(boxes) > 0:
        print(f"Damage detected: {len(boxes)} containers")


video 1/1 (frame 1/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 38.8ms
video 1/1 (frame 2/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 34.4ms
video 1/1 (frame 3/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 31.9ms
video 1/1 (frame 4/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 30.5ms
video 1/1 (frame 5/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 29.0ms
video 1/1 (frame 6/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dataset/videos/container_video.mov: 416x640 (no detections), 33.8ms
video 1/1 (frame 7/9784) /Users/miftahhadiyannoor/Documents/logistics-rag/notebooks/dat

KeyboardInterrupt: 

# RAG Experiment Layer
# AI Engineer concept to explainn why the container are
- 0. rust, 
- 1. dent, 
- 2. broken_door, 
- 3. leak

# Load SOP Docs

In [None]:
!pip install langchain langchain-community pypdf



In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("dataset/Pdf/Standard_container_operations.pdf") # SOP document for container getting authorized by STANDARDS FOR EMPTY SHIPPING CONTAINER INSPECTION VERSION 2 - May 2021
docs = loader.load()

print(f"✅ Loaded {len(docs)} pages from SOP document")

✅ Loaded 75 pages from SOP document


# Create Vector DB

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embed)
db.save_local("faiss_container_sop_db")

print(f"✅ Vector database created and saved!")
print(f"   - Total documents: {len(docs)}")
print(f"   - Embedding model: sentence-transformers/all-MiniLM-L6-v2")
print(f"   - Database saved to: faiss_container_sop_db/")

  embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


✅ Vector database created and saved!
   - Total documents: 75
   - Embedding model: sentence-transformers/all-MiniLM-L6-v2
   - Database saved to: faiss_container_sop_db/


# Prompting Using CV Result

In [None]:
prompting = f"""
Inspection result:
Dent: {counts['dent']}
Rust: {counts['rust']}
Broken Door: {counts['broken_door']}
Leak: {counts['leak']}
Severity Score: {severity}

What operational action must be taken?"""

context = db.similarity_search(query=prompting, k=3)

# AI Engineer prompt to get business problem solution

In [None]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")

final_prompt = f"""
You are senior port inspection Engineer.
Based on the container inspection results and the following SOP context, provide a concise recommendation for operational action.

context:
{context}

Inspection Results:
{prompting}

Generate:
- Risk Level
- Action Recommendation
- Repair Urgency
- Report Summary
"""

print("🤖 Generating AI recommendation...")
response = llm.invoke(final_prompt)  # Fix: llm() -> llm.invoke()
print("\n" + "="*60)
print(response)
print("="*60)

  llm = Ollama(model="llama3")


🤖 Generating AI recommendation...

Based on the inspection results and SOP context, I recommend the following:

**Risk Level:** Low

**Action Recommendation:** Acceptable

The inspection result indicates no rust, dent, broken door, or leak. The container meets the required standards for DAWE certification.

**Repair Urgency:** N/A (No repairs are needed)

**Report Summary:**

Container ID: [Insert container ID]
Assessment Date: [Insert date]
Inspector: [Insert inspector name]

Summary of Inspection Results:

* Dent: 0
* Rust: 0
* Broken Door: 0
* Leak: 0
* Severity Score: 0

Conclusion:
The container meets the required standards for DAWE certification and is acceptable for further use. No repairs or re-inspections are necessary.

Note: The container assessment procedure (CAP) requires a thorough inspection of the container, including its structure, doors, seals, and rust. Since the inspection results indicate no issues, the container is deemed acceptable.


# Financial Estimation Layer

In [None]:
repair_cost = (
    counts['dent'] * 200 +
    counts['rust'] * 300 +
    counts['broken_door'] * 500 +
    counts['leak'] * 750
)

print("Estimated Repair Cost Analysis:", repair_cost)

Estimated Repair Cost Analysis: 0
