In [1]:
!pip3 install opencv-python
!pip3 install timm
!pip3 install flask



In [5]:
import cv2
import torch
import time
import numpy as np
from flask import Flask, Response

# Initialize Flask app
app = Flask(__name__)

# Load a MiDas model for depth estimation
model_type = "MiDaS_small"  # MiDaS v3 - Large (highest accuracy, slowest inference speed)
midas = torch.hub.load("intel-isl/MiDaS", model_type)

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

# Open up the video capture from a webcam
cap = cv2.VideoCapture(1)

def generate_frames():
    while True:
        success, img = cap.read()
        if not success:
            break

        start = time.time()

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Apply input transforms
        input_batch = transform(img).to(device)

        # Prediction and resize to original resolution
        with torch.no_grad():
            prediction = midas(input_batch)

            prediction = torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze()

        depth_map = prediction.cpu().numpy()
        depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)

        end = time.time()
        totalTime = end - start

        fps = 1 / totalTime

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        depth_map = (depth_map * 255).astype(np.uint8)
        depth_map = cv2.applyColorMap(depth_map, cv2.COLORMAP_MAGMA)

        cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)

        # Combine the original image and depth map side by side
        combined_image = cv2.hconcat([img, depth_map])

        # Encode the combined image as JPEG
        _, buffer = cv2.imencode('.jpg', combined_image)
        frame = buffer.tobytes()

        # Generate the response for the frame
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

@app.route('/video_feed')
def video_feed():
    # Video streaming route
    print('HELLO TO THE WORLD OF REAL-TIME DEPTH MAPS')
    return Response(generate_frames(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)


Using cache found in C:\Users\NCBLRL467/.cache\torch\hub\intel-isl_MiDaS_master


Loading weights:  None


Downloading: "https://github.com/rwightman/gen-efficientnet-pytorch/zipball/master" to C:\Users\NCBLRL467/.cache\torch\hub\master.zip
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth" to C:\Users\NCBLRL467/.cache\torch\hub\checkpoints\tf_efficientnet_lite3-b733e338.pth
Downloading: "https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt" to C:\Users\NCBLRL467/.cache\torch\hub\checkpoints\midas_v21_small_256.pt
100%|██████████| 81.8M/81.8M [00:48<00:00, 1.75MB/s]
Using cache found in C:\Users\NCBLRL467/.cache\torch\hub\intel-isl_MiDaS_master


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.1.107:5000
Press CTRL+C to quit
192.168.1.107 - - [10/Aug/2024 15:43:13] "GET / HTTP/1.1" 404 -


HELLO WORLD TO The WORLD OF REAL-TIME DEPTH MAPS


192.168.1.107 - - [10/Aug/2024 15:43:29] "GET /video_feed HTTP/1.1" 200 -
