### Imports

In [None]:
# Datamanipulation 
import pandas as pd
import numpy as np
import unicodedata
# Working with images
import cv2
# Path tools
import glob
import os
# Stringmatching
from fuzzywuzzy import fuzz
# Vizualization
from matplotlib import pyplot as plt
import json



### Functions

In [None]:
def compare_frames_screenshot_with_matchTemplate(video_path, screenshot_path):
    """ For a given video and screenshot this function returns the frame of the video 
    with the highest simmilarity to the screenshot using openCV matchTemplate"""

    screenshot = cv2.imread(screenshot_path, cv2.IMREAD_GRAYSCALE)
    video = cv2.VideoCapture(video_path)
    highest_match_score = 0
    highscore_frame = 0

    for i in range(0,int(video.get(cv2.CAP_PROP_FRAME_COUNT))):
        video.set(cv2.CAP_PROP_POS_FRAMES, i)
        success, extracted_frame = video.read()

        if not success:
            print(f"Warning: Could not read frame {i}")
            continue
        
        extracted_frame_gray = cv2.cvtColor(extracted_frame, cv2.COLOR_BGR2GRAY)

        result = cv2.matchTemplate(extracted_frame_gray, screenshot, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

        if max_val > highest_match_score:
            highest_match_score = max_val
            highscore_frame = i

    print(f"frame number: {highscore_frame} has the highest score with:  {highest_match_score}")

    return highest_match_score, highscore_frame


def plot_frame(image):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Plot the image with matplotlib
    plt.imshow(image_rgb)
    plt.axis('off')  # Turn off axis
    plt.title("Image from OpenCV")
    plt.show()

def contains_umlaut_or_sharp_s(s):
    umlauts_and_sharp_s = set("äöüÄÖÜß")
    normalized_string = unicodedata.normalize("NFC", s)
    return any(char in umlauts_and_sharp_s for char in normalized_string)

### Sort, filter and structure videos and images

In [114]:
#path_to_images = "/Volumes/ThesisUSB/AVP XC Skiing Data/Skating 2-1 armswing/pictures/lateral/"
path_to_images = "D:\\AVP XC Skiing Data\\Skating 2-1 armswing\\pictures\\lateral\\"
skier_images = glob.glob(path_to_images + "*.jpg") + glob.glob(path_to_images + "*.png")

#path_to_videos = "/Volumes/ThesisUSB/AVP XC Skiing Data/Skating 2-1 armswing/videos/lateral/"
path_to_videos = "D:\\AVP XC Skiing Data\\Skating 2-1 armswing\\videos\\lateral\\" 
skier_videos = glob.glob(path_to_videos + "*.[mM][pP]4") + glob.glob(path_to_videos + "*.[mM][oO][vV]")
skier_videos = [video for video in skier_videos if "_lq" not in video] # exclude all videos containing "_lq" to avoid double videos

image_file_names = []
video_file_names = []

for image_path in skier_images:
    file_name_with_extension = os.path.basename(image_path)
    image_file_names.append(file_name_with_extension)

for video_path in skier_videos:
    file_name_with_extension = os.path.basename(video_path)
    video_file_names.append(file_name_with_extension)

### Generation of a dictionary containing video names (keys) with 5 matching label imagenames (values) 

In [117]:
videos_and_images_dict = {}

for video_string in video_file_names:
    images_per_vid_dict = {}
    for image_string in image_file_names:
        score = fuzz.ratio(video_string, image_string) # fuzzywuzzy used to compare simmilarity of strings
        images_per_vid_dict[image_string] = score

    top_5 = dict(sorted(images_per_vid_dict.items(), key=lambda item: item[1], reverse=True)[:5])
    key_list = list(top_5.keys())

    videos_and_images_dict[video_string] = key_list

### Find label frames in video 

In [None]:
path_to_screenshot = path_to_images + videos_and_images_dict['Hermann Noemi.mp4'][1]
path_to_frame = path_to_videos + "Hermann Noemi.mp4"

print(path_to_frame)
print(path_to_screenshot)
compare_frames_screenshot_with_matchTemplate(path_to_frame, path_to_screenshot)

D:\AVP XC Skiing Data\Skating 2-1 armswing\videos\lateral\Hermann Noemi.mp4
D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Hermann Noemi_004.jpg
frame number: 63 has the highest score with:  0.9992833137512207


(0.9992833137512207, 63)

### Generation of dict containing Video and Label Frames 
- Iteration over videos_and_images_dict to find the highscoreframe which is the full label frame 

In [None]:
label_frames_dict = {
    "videos": []
}

for key, value in videos_and_images_dict.items():
    print(f"Processing video: {key}")

    single_video_dict = {
        "videoname": key,
        "frame_details": []
    }

    for screenshot in range(len(value)):
        print(f"Processing screenshot {screenshot} for video {key}")
        
        frame_detail_dict = {}
        path_to_frame = path_to_images + key
        path_to_screenshot = path_to_images + videos_and_images_dict[key][screenshot]

        print(f"Path to frame: {path_to_frame}")
        print(f"Path to screenshot: {path_to_screenshot}")
        
        highest_match_score, highscore_frame = compare_frames_screenshot_with_matchTemplate(path_to_frame, path_to_screenshot)
        print(f"Match score: {highest_match_score}, High score frame: {highscore_frame}")

        frame_detail_dict["frame_number"] = highscore_frame
        frame_detail_dict["score"] = highest_match_score
        frame_detail_dict["image_name"] = videos_and_images_dict[key][screenshot]

        single_video_dict["frame_details"].append(frame_detail_dict)
    
    label_frames_dict["videos"].append(single_video_dict)

with open("D:\Code\labelFrames.json", "w") as json_file:
    json.dump(label_frames_dict, json_file, indent=4)



Processing video: Klauser Hannes.mp4
Processing screenshot 0 for video Klauser Hannes.mp4
Path to frame: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes.mp4
Path to screenshot: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes_004.jpg
frame number: 0 has the highest score with:  0
Match score: 0, High score frame: 0
Processing screenshot 1 for video Klauser Hannes.mp4
Path to frame: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes.mp4
Path to screenshot: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes_005.jpg
frame number: 0 has the highest score with:  0
Match score: 0, High score frame: 0
Processing screenshot 2 for video Klauser Hannes.mp4
Path to frame: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes.mp4
Path to screenshot: D:\AVP XC Skiing Data\Skating 2-1 armswing\pictures\lateral\Klauser Hannes_001.jpg
frame number: 0 has the highest score with:  0


### Demo of TemplateMatching

In [None]:
path_to_example_screenshot = path_to_images + videos_and_images_dict['Grüner Amelie.MOV'][1]
path_to_frame = path_to_videos + "Grüner Amelie.MOV"

example_score, example_frame = compare_frames_screenshot_with_matchTemplate(path_to_frame, path_to_example_screenshot)
screenshot = cv2.imread(path_to_example_screenshot, cv2.IMREAD_COLOR)
test_cap = cv2.VideoCapture(path_to_frame)
test_cap.set(cv2.CAP_PROP_POS_FRAMES, example_frame)
ret, extracted_frame = test_cap.read()

plot_frame(extracted_frame)
plot_frame(screenshot)