In [7]:
import cv2
import numpy as np
import imutils
import glob
import torch
from classification import Classifier

In [3]:

def update(x, P, Z, H, R):
    ### Insert update function
    Y = Z - np.dot(H,x)
    S = np.dot(np.dot(H, P), np.transpose(H)) + R
    K = np.dot(np.dot(P, np.transpose(H)), np.linalg.pinv(S))
    X_prime = x + np.dot(K, Y)
    P_prime = np.dot((I - np.dot(K, H)), P)
    
    return Y, S, K, X_prime, P_prime
    
def predict(x, P, F, u):
    ### insert predict function
    X_prime = np.dot(F,x) + u
    P_prime = np.dot(np.dot(F, P), np.transpose(F))
    
    return X_prime, P_prime
    
    
### Initialize Kalman filter ###
# The initial state (6x1).
x = np.array([[0], # Position along the x-axis
              [0], # Velocity along the x-axis
              [0], # Position along the y-axis
              [0], # Velocity along the y-axis
              [0], # Position along the z-axis
              [0]])# Velocity along the z-axis

# The initial uncertainty (6x6).
P = np.array([[1000, 0, 0, 0, 0, 0],
              [0, 1000, 0, 0, 0, 0],
              [0, 0, 1000, 0, 0, 0],
              [0, 0, 0, 1000, 0, 0],
              [0, 0, 0, 0, 1000, 0],
              [0, 0, 0, 0, 0, 1000]])

# The external motion (6x1).
u = np.array([[0],
              [0],
              [0],
              [0],
              [0],
              [0]])

# The transition matrix (6x6).  
F = np.array([[1, 1, 0, 0, 0, 0],
              [0, 1, 0, 0, 0, 0],
              [0, 0, 1, 1, 0, 0],
              [0, 0, 0, 1, 0, 0],
              [0, 0, 0, 0, 1, 1],
              [0, 0, 0 ,0, 0, 1]])

# The observation matrix (2x6).
H = np.array([[1, 0, 0, 0, 0, 0],
              [0, 0, 1, 0, 0, 0],
              [0, 0, 0, 0, 1, 0]])

# The measurement uncertainty.
R = 1
              
# The identity matrix. Simply a matrix with 1 in the diagonal and 0 elsewhere.
I = np.array([[1, 0, 0, 0, 0, 0],
              [0, 1, 0, 0, 0, 0],
              [0, 0, 1, 0, 0, 0],
              [0, 0, 0, 1, 0, 0],
              [0, 0, 0, 0, 1, 0],
              [0, 0, 0, 0, 0, 1]])



# Load the video
images_left = sorted(glob.glob('../final_project_2023_rect/seq_01/image_02/data/*.png'))
images_right = sorted(glob.glob('../final_project_2023_rect/seq_01/image_03/data/*.png'))
assert images_left
assert images_right
for i in range(0,len(images_left)):
    frame= cv2.imread(images_left[i])
    frame = imutils.resize(frame, width=600)    
    ### Detect the ball ### focal length (fx, fy) and optical centers (cx, cy)
    # K_02 = [[fx 0 cx],
      #      [0 fy cy],
      #      [0 0 1]]
    ## get focal length f from the K matrix
    # focal_length = K_02[0][0]
    ## compute the baseline b using corresponding values from the translation vectors t
    ## baseline b is the euclidean norm of the difference vector (t1 - t2)
    # b = np.linalg.norm(t_02-t_03)
    ## z_obj = the depth estimated by the detection.
    # depth = (b * focal_length) / disparity)
    #stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
    #disparity = stereo.compute(imgL,imgR)
    # disparity = x_left - x_right
    
    #(type, x_obj, y_obj, z_obj, height, width) = find_ball(frame)
    
    x_new = x
    
    z = np.array([[x_obj],[y_obj], [z_obj]])
    Y, S, K, x_new, P = update(x, P, z, H, R)
   
    text1 = "Position x= {}, y ={} z={}".format(x_new[0], x_new[1], x_new[2])
    text2 = "Velocity x = {}, y = {}".format(x_new[3], x_new[4], x_new[5])
                                           
    cv2.putText(frame, text1, (10, 50),  cv2.FONT_HERSHEY_DUPLEX, 0.4, (155, 0, 155), 1)
    cv2.putText(frame, text2, (10, 70),  cv2.FONT_HERSHEY_DUPLEX, 0.4, (155, 0, 155), 1)
   
    #cv2.circle(frame, (int(x_new[0]), int(x_new[3])), int(max_radius),(0, 125, 255), 2)
    ### Predict the next state
    x, P = predict(x_new, P, F, u)
    x_new = x
    
    #cv2.circle(frame, (int(x[0]), int(x[3])), int(max_radius),(0, 125, 255), 2)
    
    ### Draw the current tracked state and the predicted state on the image frame ###
    
    # Show the frame
    cv2.imshow('Frame', frame)
    cv2.waitKey(50)

cv2.destroyAllWindows()

In [21]:
from matplotlib import pyplot as plt

images_left = sorted(glob.glob('final_project_2023_rect/seq_01/image_02/data/*.png'))
images_right = sorted(glob.glob('final_project_2023_rect/seq_01/image_03/data/*.png'))
assert images_left
assert images_right

focal_length = 956.9475
b = np.linalg.norm(np.array([0.059896, -0.001367835, 0.004637624 ])-np.array([-0.4756270, 0.005296617, -0.005437198]))
print(b)
cv2.destroyAllWindows()
classifier = Classifier(format='pt')
for i in range(0, len(images_left)):
    imgLeft = cv2.imread(images_left[i])
    imgR = cv2.imread(images_right[i])
    
    imgL = cv2.cvtColor(imgLeft, cv2.COLOR_BGR2GRAY)
    imgR = cv2.cvtColor(imgR, cv2.COLOR_BGR2GRAY)
    
    results = classifier.predict(imgLeft)
    
    box = results[0].boxes[0].xywh
    box1 = results[0].boxes[1].xywh
    box = torch.squeeze(box)
    for i in range(0,len(results[0])):
        box = results[0].boxes[i].xywh
        box = torch.squeeze(box)
        print(int(box[0]), int(box[1]))
        cv2.circle(imgLeft, (int(box[0]), int(box[1])), 3, (0, 0, 255), 5)
    cv2.imshow('Testing image', imgLeft)
        
    #print(box)
    
    stereo = cv2.StereoBM_create(numDisparities=128, blockSize=15)
    # min_disp = 3
    # stereo.setMinDisparity(min_disp)
    # stereo.setDisp12MaxDiff(100)
    # stereo.setUniquenessRatio(50)
    # stereo.setSpeckleRange(3)
    # stereo.setSpeckleWindowSize(50)
    disparity = stereo.compute(imgL,imgR)
    norm_image = cv2.normalize(disparity, None, alpha = 0, beta = 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    dst = cv2.GaussianBlur(norm_image,(7,7),cv2.BORDER_DEFAULT)
    #cv2.imshow('norm_image', dst)
    cv2.waitKey(500)
    
    
    depth = (b * focal_length) / disparity[240][448]
    
cv2.destroyAllWindows()




0.5356592204823809


0: 384x1248 4 Pedestrians, 1 Car, 189.8ms
Speed: 5.4ms preprocess, 189.8ms inference, 0.9ms postprocess per image at shape (1, 3, 1248, 1248)


447 255
509 236
135 214
430 250
552 166



0: 384x1248 5 Pedestrians, 1 Car, 184.4ms
Speed: 7.5ms preprocess, 184.4ms inference, 1.3ms postprocess per image at shape (1, 3, 1248, 1248)


441 256
134 228
458 257
521 235
495 172
552 165



0: 384x1248 2 Pedestrians, 1 Car, 154.6ms
Speed: 8.2ms preprocess, 154.6ms inference, 2.2ms postprocess per image at shape (1, 3, 1248, 1248)


530 235
460 254
134 208



0: 384x1248 4 Pedestrians, 1 Car, 151.9ms
Speed: 4.3ms preprocess, 151.9ms inference, 2.0ms postprocess per image at shape (1, 3, 1248, 1248)


546 239
475 254
134 223
533 240
488 248



0: 384x1248 3 Pedestrians, 1 Car, 141.2ms
Speed: 4.3ms preprocess, 141.2ms inference, 0.6ms postprocess per image at shape (1, 3, 1248, 1248)


491 255
560 240
134 218
531 161



0: 384x1248 5 Pedestrians, 1 Car, 156.9ms
Speed: 8.1ms preprocess, 156.9ms inference, 0.8ms postprocess per image at shape (1, 3, 1248, 1248)


504 255
573 242
531 164
134 220
534 163
486 251



0: 384x1248 5 Pedestrians, 1 Car, 171.4ms
Speed: 5.3ms preprocess, 171.4ms inference, 2.9ms postprocess per image at shape (1, 3, 1248, 1248)


576 244
508 255
134 222
589 243
549 168
531 162



0: 384x1248 6 Pedestrians, 1 Car, 165.3ms
Speed: 5.8ms preprocess, 165.3ms inference, 1.0ms postprocess per image at shape (1, 3, 1248, 1248)


536 257
597 244
583 244
134 219
514 256
500 172
493 171



0: 384x1248 10 Pedestrians, 1 Car, 161.8ms
Speed: 4.9ms preprocess, 161.8ms inference, 0.7ms postprocess per image at shape (1, 3, 1248, 1248)


532 255
601 244
550 255
500 172
134 218
504 173
589 245
613 242
529 163
509 172
573 248



0: 384x1248 8 Pedestrians, 1 Car, 147.1ms
Speed: 5.6ms preprocess, 147.1ms inference, 0.8ms postprocess per image at shape (1, 3, 1248, 1248)


559 256
620 244
134 220
505 173
525 166
529 164
607 247
508 172
541 252
