In [None]:
from GenerateSyntheticDataset import generate_dataset
import os
import shutil
from distutils.dir_util import copy_tree
import time

### Setup variables for the run

In [None]:
GENERATE_NEW = True  # fail-safe so we don't overwrite our dataset
GENERATE_NR_IMAGES = 50
SEGMENTATION = True
GENERATE_VIDEO = False
results = {'Mode': 'Segmentation' if SEGMENTATION else 'Detection'}

In [None]:
if GENERATE_NEW:
    #check if "/datasets" exists, if it does, delete it
    if os.path.isdir("./datasets"):
        print("Deleting old dataset...")
        #remove the old dataset
        for filename in os.listdir("./datasets"):
            file_path = os.path.join("./datasets", filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print('Failed to delete %s. Reason: %s' % (file_path, e))

    generate_dataset(custom_num_images=GENERATE_NR_IMAGES, segmentation=SEGMENTATION)

    os.makedirs("./datasets", exist_ok=True)
    copy_tree("./GeneratedDataset/", "./datasets/GeneratedDataset")


### Video generation from images

In [None]:
if GENERATE_VIDEO:
    import cv2
    import os
    
    image_folder = 'real_footage/Run1/color'
    video_name = 'real_footage/Run1/run1.mp4'
    
    images = [img for img in os.listdir(image_folder) if img.endswith(".png")]
    images.sort()
    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape
    
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    
    video = cv2.VideoWriter(video_name, fourcc, 20.0, (width, height))
    
    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))
    
    video.release()

## Object detection

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs/

In [None]:
if not SEGMENTATION:
    from ultralytics import YOLO
    import torch
    import platform
    
    # Load already trained weights
    
    ###########
    EPOCHS = 30
    model_to_use = 'runs/detect/train5/weights/best.pt'  # use 'yolov8n.pt' to start fresh
    ###########
    
    #if windows put "/" before model_to_use 
    curr_os = platform.system()
    if curr_os == 'Windows':
        model_to_use = '/' + model_to_use
    
    try:
        model = YOLO(model_to_use)
    except FileNotFoundError:
        print("Model not found, retraining from scratch with YoloV8n")
        model = YOLO('yolov8n.pt')
    
    #check if using cuda
    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda()
    
    #is os windows?
    import os
    import platform
    
    start = time.time()
    if curr_os == 'Windows':
        print('Using Windows')
        res = model.train(data='generated_dataset_Win.yaml', epochs=EPOCHS, optimizer='SGD')
    else:
        print('Using Linux')
        res = model.train(data='generated_dataset.yaml', epochs=EPOCHS, optimizer='SGD')
    end = time.time()
    
    # Save the results
    results['Epochs'] = EPOCHS
    results['Time training'] = end - start

## Segmentation

In [None]:
if SEGMENTATION:
    from ultralytics import YOLO
    import torch
    import platform
    
    # Load already trained weights
    
    ###########
    EPOCHS = 10
    BATCH_SIZE = 8
    TRAIN = True
    model_to_use = 'runs/segment/train13/weights/best.pt'  # use 'yolov8n-seg.pt' to start fresh
    ###########
    
    #if windows put "/" before model_to_use 
    curr_os = platform.system()
    if curr_os == 'Windows':
        model_to_use = '/' + model_to_use
    
    try:
        model = YOLO(model_to_use)
    except FileNotFoundError:
        print("Model not found, retraining from scratch with YoloV8n-seg")
        model = YOLO('yolov8n-seg.pt')
    
    #check if using cuda
    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda()
    
    #is os windows?
    import os
    import platform
    
    if TRAIN:
        start = time.time()
        if curr_os == 'Windows':
            print('Using Windows')
            res = model.train(data='generated_dataset_Win.yaml', epochs=EPOCHS, batch=BATCH_SIZE, optimizer='SGD')
        else:
            print('Using Linux')
            res = model.train(data='generated_dataset.yaml', epochs=EPOCHS, batch=BATCH_SIZE, optimizer='SGD')
        end = time.time()
        results['Time training'] = end - start
    # Save the results
    results['Epochs'] = EPOCHS

## Export results

In [None]:
# Create folder for results
if not os.path.isdir('./results'):
    os.makedirs('./results')
# Save results in txt file
with open("results/results.txt", 'w') as f:  
    for key, value in results.items():  
        f.write('%s:%s\n' % (key, value))

### Random image result

In [None]:
from PIL import Image

results = model('datasets/GeneratedDataset/images/val/image_0.png')  # results list

# Show the results
for r in results:
    im_array = r.plot()  # plot a BGR numpy array of predictions
    im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
    im.show()  # show image
    im.save('results/results.jpg')  # save image

### Real footage video result

In [None]:
import cv2
import time

# Get the image shape
frame = cv2.imread('real_footage/Run1/color/000000.png')
height, width, layers = frame.shape

# Open the video file
video_path = "real_footage/Run1/run1.mp4"
cap = cv2.VideoCapture(video_path)
img_array = []

# used to record the time when we processed last frame 
prev_frame_time = 0
# used to record the time at which we processed current frame 
new_frame_time = 0

font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (1750, 25)
fontScale = 1
fontColor = (0, 0, 255)
thickness = 2
lineType = 2

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()
        
        # time when we finish processing for this frame 
        new_frame_time = time.time()

        # fps will be number of frame processed in given time frame 
        # since their will be the most time error of 0.001 second 
        # we will be subtracting it to get more accurate result 
        fps = 1 / (new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time

        # converting the fps into integer 
        fps = str(int(fps))

        # putting the FPS count on the frame 
        cv2.putText(annotated_frame, 'FPS: ' + fps,
                    bottomLeftCornerOfText,
                    font,
                    fontScale,
                    fontColor,
                    thickness,
                    lineType)

        # Display the annotated frame
        cv2.imshow("YOLOv8 Inference", annotated_frame)
        img_array.append(annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

out = cv2.VideoWriter('results/real_footage.mp4', cv2.VideoWriter_fourcc(*'XVID'), 30.0,
                      (width, height))
#write video
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()
print('Video created successfully.')