In [1]:
from ultralytics import YOLO
import os 
import cv2

In [3]:
# Load the YOLOv8 model
model = YOLO('yolov8n.pt')

# Directory containing the PNG images
image_dir = "D:/KTH/IL2232&II2211_P5/kitti_dataset/testing/image_02/0002/"

# List all PNG files in the directory
image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]

for image_file in image_files:
    # Load the PNG image
    image_path = os.path.join(image_dir, image_file)
    frame = cv2.imread(image_path)

    # Run YOLOv8 tracking on the image, persisting tracks between frames
    results = model.track(frame, persist=True)

    # Visualize the results on the image
    annotated_frame = results[0].plot()

    # Display the annotated image
    cv2.imshow("YOLOv8 Tracking", annotated_frame)

    # Wait for a key press (you can customize this behavior)
    cv2.waitKey(0)

# Close the display window
cv2.destroyAllWindows()


0: 224x640 2 cars, 201.3ms
Speed: 0.0ms preprocess, 201.3ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 640)



: 

In [2]:
model = YOLO('yolov8s.pt')  # load an official model
ds_source = 'D:/KTH/IL2232&II2211_P5/kitti_dataset/testing/image_02/0002/'
results = model.predict(ds_source,conf=0.5)  # predict on an image


image 1/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000000.png: 224x640 4 cars, 2 traffic lights, 0.0ms
image 2/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000001.png: 224x640 5 cars, 1 traffic light, 15.6ms
image 3/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000002.png: 224x640 4 cars, 2 traffic lights, 4.5ms
image 4/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000003.png: 224x640 4 cars, 2 traffic lights, 10.9ms
image 5/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000004.png: 224x640 4 cars, 2 traffic lights, 15.5ms
image 6/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000005.png: 224x640 4 cars, 1 traffic light, 7.7ms
image 7/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000006.png: 224x640 4 cars, 1 traffic light, 0.0ms
image 8/243 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000007.png: 224x640 4 cars, 1 traffic light, 15.7ms
image 9

In [6]:
model = YOLO('yolov8s.pt')  # load an official model
ds_source = 'D:/KTH/IL2232&II2211_P5/kitti_dataset/testing/image_02/0002/000021.png'
results = model.track(source=ds_source, conf=0.5, show=True)
results[0].boxes


image 1/1 D:\KTH\IL2232&II2211_P5\kitti_dataset\testing\image_02\0002\000021.png: 224x640 3 cars, 33.9ms
Speed: 2.0ms preprocess, 33.9ms inference, 5.1ms postprocess per image at shape (1, 3, 224, 640)


ultralytics.engine.results.Boxes object with attributes:

cls: tensor([2., 2., 2.])
conf: tensor([0.8113, 0.7780, 0.7573])
data: tensor([[432.0870, 176.9650, 506.3315, 230.5179,   1.0000,   0.8113,   2.0000],
        [546.5690, 175.8145, 587.3947, 208.0988,   2.0000,   0.7780,   2.0000],
        [499.8274, 178.0221, 525.0803, 199.0363,   3.0000,   0.7573,   2.0000]])
id: tensor([1., 2., 3.])
is_track: True
orig_shape: (375, 1242)
shape: torch.Size([3, 7])
xywh: tensor([[469.2092, 203.7414,  74.2445,  53.5530],
        [566.9818, 191.9566,  40.8257,  32.2843],
        [512.4539, 188.5292,  25.2529,  21.0142]])
xywhn: tensor([[0.3778, 0.5433, 0.0598, 0.1428],
        [0.4565, 0.5119, 0.0329, 0.0861],
        [0.4126, 0.5027, 0.0203, 0.0560]])
xyxy: tensor([[432.0870, 176.9650, 506.3315, 230.5179],
        [546.5690, 175.8145, 587.3947, 208.0988],
        [499.8274, 178.0221, 525.0803, 199.0363]])
xyxyn: tensor([[0.3479, 0.4719, 0.4077, 0.6147],
        [0.4401, 0.4688, 0.4729, 0.5549],
 

: 

In [3]:
def center_point(x_l, x_r, y_t, y_b):
  x = (x_r - x_l) * 0.5 + x_l
  y = (y_t - y_b) * 0.5 + y_b
  return (x, y)

Save to file

In [4]:
image_files = [f for f in os.listdir(ds_source) if f.endswith('.png')]
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)
for i in range(len(image_files)):
    result_file = f'0002_{i}.txt'
    result_pth = os.path.join(output_dir, result_file)
    boxes_info = results[i].boxes # all the bounding box in one image with all the properties
    with open(result_pth, 'w') as file:
        for obj in range(len(boxes_info.cls)):
            cls = boxes_info.cls[obj]
            x_l, y_t, x_r, y_b = boxes_info.xyxy[obj].tolist()
            x_center, y_center = center_point(x_l, x_r, y_t, y_b)
            file.write(f'{cls},{x_l},{y_t},{x_r},{y_b},{x_center},{y_center}\n')
    print(f'Bounding box information saved to {result_file}')

Bounding box information saved to 0002_0.txt
Bounding box information saved to 0002_1.txt
Bounding box information saved to 0002_2.txt
Bounding box information saved to 0002_3.txt
Bounding box information saved to 0002_4.txt
Bounding box information saved to 0002_5.txt
Bounding box information saved to 0002_6.txt
Bounding box information saved to 0002_7.txt
Bounding box information saved to 0002_8.txt
Bounding box information saved to 0002_9.txt
Bounding box information saved to 0002_10.txt
Bounding box information saved to 0002_11.txt
Bounding box information saved to 0002_12.txt
Bounding box information saved to 0002_13.txt
Bounding box information saved to 0002_14.txt
Bounding box information saved to 0002_15.txt
Bounding box information saved to 0002_16.txt
Bounding box information saved to 0002_17.txt
Bounding box information saved to 0002_18.txt
Bounding box information saved to 0002_19.txt
Bounding box information saved to 0002_20.txt
Bounding box information saved to 0002_21.tx

In [5]:
N = 3 # size of sliding windows, N frames
sw_center = [] # center points group by sliding windows
for x in range(len(image_files)-N+1):
    xyt = [] # center points from the frames in a sliding window
    for i in range(N):
        result_file = f'0002_{x+i}.txt'
        result_pth = os.path.join(output_dir, result_file)
        with open(result_pth, 'r') as file:
            xy = []
            for line in file:
            # 将每行数据按逗号分割，生成一个列表
                values = line.strip().split(',')
                cls = float(values[0])
                
                if cls==0.0 or cls==1.0 or cls==2.0 or cls==3.0 or cls==5.0: 
                    xy_center = (float(values[-2]), float(values[-1]))
                    xy.append(xy_center)
            # print(xy)
            xyt.append(xy)
    print(xyt)
    sw_center.append(xyt)




[[(176.79537963867188, 201.1571502685547), (465.989013671875, 206.65737915039062), (505.7383270263672, 193.5426788330078), (571.136474609375, 198.12793731689453)], [(162.58068084716797, 204.72848510742188), (451.89605712890625, 212.04947662353516), (506.5579833984375, 194.1491470336914), (571.2921142578125, 198.41339874267578), (33.773311614990234, 210.4832534790039)], [(144.70240020751953, 208.25147247314453), (431.1757049560547, 219.683349609375), (571.1095886230469, 198.95115661621094), (504.0834503173828, 195.55923461914062)]]
[[(162.58068084716797, 204.72848510742188), (451.89605712890625, 212.04947662353516), (506.5579833984375, 194.1491470336914), (571.2921142578125, 198.41339874267578), (33.773311614990234, 210.4832534790039)], [(144.70240020751953, 208.25147247314453), (431.1757049560547, 219.683349609375), (571.1095886230469, 198.95115661621094), (504.0834503173828, 195.55923461914062)], [(404.3209533691406, 228.27398681640625), (127.40068817138672, 208.50372314453125), (571.

In [6]:
input=[["7 0.1 0.1 0.2 0.2",
 "2 0.5 0.5 0.6 0.6"],
["7 0.2 0.2 0.3 0.3",
 "2 0.4 0.4 0.5 0.5"],
["7 0.8 0.9 0.2 0.3",
 "2 0.3 0.3 0.4 0.4"]]

In [7]:
xyt=[] # 一个滑动窗口中不同时间下的center points
xy=[] #同一帧画面中的center points
for frame in input: #此处的frame就是一帧图像，相当于给了一个滑动窗口
  xy=[]
  for i in frame:
    temp=i.split(" ")
    #xy.append(center_point(i.split(" ")[1:4]))
    xy.append(center_point(float(temp[1]), float(temp[2]), float(temp[3]), float(temp[4])))
  xyt.append(xy)
print(xy)
print(xyt)

[(0.8500000000000001, 0.25), (0.3, 0.4)]
[[(0.1, 0.2), (0.5, 0.6)], [(0.2, 0.3), (0.4, 0.5)], [(0.8500000000000001, 0.25), (0.3, 0.4)]]


In [8]:
list_frame=[]
spatial_rel_matrix=[]
xyt = sw_center[0]
for xy in xyt:
  spatial_rel_matrix=[]
  for n1, obj1 in enumerate(xy):
    spatial_rel_matrix.append([])
    for n2, obj2 in enumerate(xy):

      if obj1 == obj2:
        spatial_rel_matrix[n1].append(0)
      elif obj1[0] >= obj2[0] and obj1[1] >= obj2[1]:
        spatial_rel_matrix[n1].append(2) # bottom-right
      elif obj1[0] < obj2[0] and obj1[1] >= obj2[1]:
        spatial_rel_matrix[n1].append(1) # bottom-left
      elif obj1[0] < obj2[0] and obj1[1] < obj2[1]:
        spatial_rel_matrix[n1].append(3) # top-left
      elif obj1[0] >= obj2[0] and obj1[1] < obj2[1]:
        spatial_rel_matrix[n1].append(4) # top-right

  list_frame.append(spatial_rel_matrix)

#print(spatial_rel_matrix)
print(len(list_frame))
print(list_frame)

3
[[[0, 3, 1, 1], [2, 0, 1, 1], [4, 4, 0, 3], [4, 4, 2, 0]], [[0, 3, 1, 1, 4], [2, 0, 1, 1, 2], [4, 4, 0, 3, 4], [4, 4, 2, 0, 4], [1, 3, 1, 1, 0]], [[0, 3, 1, 1], [2, 0, 1, 1], [4, 4, 0, 2], [4, 4, 3, 0]]]


In [9]:
# 1 moving left to right
# 2 moving right to left
# 3 moving forward
# 4 moving backward
# 5 no change
# 6 moving foward *
# 7 moving backward *
# 8 self edge
def comp(prev, curr):
  if(prev == 1 and curr == 2):
    return 1
  elif(prev == 1 and curr == 3):
    return 3
  elif(prev == 1 and curr == 4):
    return 6
  elif(prev == 2 and curr == 1):
    return 2
  elif(prev == 2 and curr == 3):
    return 6
  elif(prev == 2 and curr == 4):
    return 3
  elif(prev == 3 and curr == 1):
    return 4
  elif(prev == 3 and curr == 2):
    return 7
  elif(prev == 3 and curr == 4):
    return 1
  elif(prev == 4 and curr == 1):
    return 7
  elif(prev == 4 and curr == 2):
    return 4
  elif(prev == 4 and curr == 3):
    return 2
  elif(prev == 0 and curr == 0):
    return 8
  elif(prev == curr):
    return 5
  else:
    raise TypeError("Fucntion Comp Failed!!")

In [10]:
matrix_size=len((list_frame[1][1]))
print(matrix_size)
spatio_temporal_matrix = [[0 for _ in range(matrix_size)] for _ in range(matrix_size)]

for frame_nr, frame in enumerate(list_frame):
  if len(list_frame)==frame_nr+1:
    continue
  frame2=list_frame[frame_nr+1]
  for x in range(len(list_frame[frame_nr])): #to do: check if they are different sizes
    for y in range(len(list_frame[frame_nr])):
      #if frame[x][y] == 1 and frame2[x][y] == 2:
      #print(x,y)
      spatio_temporal_matrix[x][y]=comp(frame[x][y], frame2[x][y])
print(spatio_temporal_matrix)



5


IndexError: list index out of range