In [249]:
from transformers import DPTImageProcessor, DPTForDepthEstimation, GLPNFeatureExtractor, GLPNForDepthEstimation
import torch
import numpy as np
from PIL import Image

processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

# feature_extractor = GLPNFeatureExtractor.from_pretrained("vinvino02/glpn-kitti")
# model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti")
# feature_extractor = GLPNFeatureExtractor.from_pretrained("vinvino02/glpn-nyu")
# model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")

def predict(image):
    # prepare image for the model
    inputs = processor(images=image, return_tensors="pt")
    # inputs = feature_extractor(images=image, return_tensors="pt")

    # get prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_depth = outputs.predicted_depth

    # interpolate to original size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False,
    )

    # visualize the prediction
    output = prediction.squeeze().cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")
    return formatted

Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution1.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [250]:
import matplotlib.pyplot as plt
import os
import cv2

def cutout(image):
    image = np.array(image)
    cropped = image[0:1000, 420:1000]
    return cropped

predictions = []
for img in os.listdir("test_imgs/"):
    print(img)
    image = Image.open("test_imgs/" + img)
    image = cutout(image)
    depth = predict(Image.fromarray(image))
    predictions.append(depth)
    # save image
    
    break

bigpile.jpg




In [232]:
def find_center_height(contour):
    M = cv2.moments(contour)
    cx = int(M['m10']/M['m00'])
    cy = int(M['m01']/M['m00'])
    return cy

def find_bounding_rect_area(contour):
    x,y,w,h = cv2.boundingRect(contour)
    return w*h

def find_highest_point(contour):
    x,y,w,h = cv2.boundingRect(contour)
    return y

In [251]:
from PIL import Image
import os 
import cv2
import numpy as np


def analyse_frame(image):
    depth = predict(Image.fromarray(image))

    # depth[depth < 100] = 0


    # copy the depth image as we will need it later
    depth_copy = depth.copy()
    depth_copy = cv2.cvtColor(depth_copy, cv2.COLOR_GRAY2RGB)

    depth = cv2.cvtColor(depth, cv2.COLOR_GRAY2RGB)

    sobel_X = cv2.Sobel(depth, cv2.CV_64F, 1, 0, ksize=3) 

    sobel_X_abs = np.uint8(np.absolute(sobel_X)) 


    sobel_Y = cv2.Sobel(depth, cv2.CV_64F,0, 1, ksize=3) 

    sobel_Y_abs = np.uint8(np.absolute(sobel_Y)) 


    depth = cv2.bitwise_or(sobel_Y_abs,sobel_X_abs) 
    # depth = sobel_Y_abs
    # depth = cv2.line(depth, (200, i), (400, i), (0, 0, 255), 2)

 
    # find the main edge of the mountain using canny edge detection


    depth = cv2.Canny(depth, 100, 200)

    blurred = cv2.GaussianBlur(depth, (3, 3), 0)


    # # # Find contours in the depth 
    contours, hierarchy = cv2.findContours(blurred, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 
    # # # Select the contour you want to extract 
    # # For example, you can select the largest contour 

    # # find the centroids of the contours
    contours = list(filter(lambda cnt: find_center_height(cnt) > 0 and find_center_height(cnt) < 600, contours))

    if len (contours) != 0:
        print("contours found")
        largest_contour = max(contours, key=find_bounding_rect_area)


    # depth = cv2.cvtColor(depth, cv2.COLOR_GRAY2RGB)

    #  # Draw the selected contour on a blank depth 

    # find the highest point of the contour

        i = find_highest_point(largest_contour)
        
        image = cv2.line(image, (200, i), (400, i), (0, 0, 255), 2)

        top, bottom = depth_copy.shape[0], 400

        height_percentage = (1 - (i + 0.1)/(top - bottom ))
        
        c = (255, 0, 0)
        note = ""
        # draw text on the image
        if height_percentage > 0.90:
            c = (0, 0, 255) 
            note = "Critical level!"
        elif height_percentage > 0.75:
            # yellow
            c = (0, 255, 255)
            note = "Soon to be full"
        elif height_percentage > 0.4:
            note = "Slowly filling up"
        

        textx = 200
        texty = 500
        cv2.putText(depth_copy, f"Capacity: {((height_percentage)):.0%}", (textx, texty), cv2.FONT_HERSHEY_SIMPLEX, 1, c, 2, cv2.LINE_AA)
        cv2.putText(depth_copy, f"{note}", (textx, texty+25), cv2.FONT_HERSHEY_SIMPLEX, 1, c, 2, cv2.LINE_AA)


            # draw lines at 200 and 600
        # depth_copy = cv2.line(depth_copy, (0, 200), (1000, 200), (255, 255, 255), 2)
        # depth_copy = cv2.line(depth_copy, (0, 600), (1000, 600), (255, 255, 255), 2)
        
        cv2.drawContours(depth_copy, [largest_contour], -1, (200, 255, 200), 2)

    toshow = np.concatenate((image, depth_copy), axis=1)

    print(toshow.shape)
    return toshow
    cv2.imshow('frame', toshow)

for i, img in enumerate(os.listdir("test_imgs/")):
    image = cv2.imread("test_imgs/" + img)
    image = cutout(image)
    frame = analyse_frame(image)
    # open the image with cv2
    cv2.imshow('frame', frame)
    cv2.waitKey(0)
    break
  
cv2.destroyAllWindows() 

contours found
(791, 1160, 3)


In [248]:
# open the video file
import cv2
# import threading
import time
# loop through the video



def show():
    cap = cv2.VideoCapture('videos/trashpile_combined.mp4')

    # get the frame width and height
    width = int(1240)
    height = int(1000)

    # create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output_combined.avi', fourcc, 10, (width, height))

    # get the frames per second
    fps = cap.get(cv2.CAP_PROP_FPS)

    # get the frame width and height
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    predictions = []
    i = 0
    total_i = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            i += 1
            if i % 30 != 0:
                continue
            # start a thread to analyze the frame
            frame = cutout(frame)
            frame = analyse_frame(frame)

            cv2.imshow('frame', frame)

            print("actual", frame.shape)
            # write to video
            # out.write(frame)
            # press q to quit
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("broke")
                break
        else:
            print("no frame")
            break

    # release the VideoCapture object
    print("done")
    cap.release()
    cv2.destroyAllWindows()


show()

contours found
(1000, 1240, 3)
actual (1000, 1240, 3)
contours found
(1000, 1240, 3)
actual (1000, 1240, 3)
contours found
(1000, 1240, 3)
actual (1000, 1240, 3)
contours found
(1000, 1240, 3)
actual (1000, 1240, 3)
broke
done


In [100]:
cv2.destroyAllWindows()