## Building a First Data Set for Baselinemodel

In [7]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import os
import re
from fuzzywuzzy import fuzz
import glob

In [11]:
# Initialize Mediapipe Pose
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [43]:
def extract_pose_features(image_path):
    # Read the image
    image = cv2.imread(image_path)

    with mp_pose.Pose(
        static_image_mode=True,
        model_complexity=2, # BlazePose GHUM Heavy, BlazePose GHUM Full, BlazePose GHUM Lite (flags 0,1,2)
        min_detection_confidence=0.5) as pose:

        # Convert the image to RGB before processing
        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        if results.pose_landmarks:
            pose_features = []
            for landmark in results.pose_landmarks.landmark:
                pose_features.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
            
            return np.array(pose_features)
        else:
            return None

def extract_class(filename):
    match = re.search(r'_ *(\d+)\.', filename)
    if match:
        return int(match.group(1))  # Convert to int to remove leading zeros
    else:
        return 42 

### Extracting Features using MediaPipe 

In [60]:
path_to_images = "/Volumes/ThesisUSB/AVP XC Skiing Data/Skating 2-1 armswing/pictures/lateral/"
skier_images = glob.glob(path_to_images + "*.jpg") + glob.glob(path_to_images + "*.png")

path_to_videos = "/Volumes/ThesisUSB/AVP XC Skiing Data/Skating 2-1 armswing/videos/lateral/"
skier_videos = glob.glob(path_to_videos + "*.[mM][pP]4") + glob.glob(path_to_videos + "*.[mM][oO][vV]")
skier_videos = [video for video in skier_videos if "_lq" not in video] # exclude all videos containing "_lq" to avoid double videos


In [90]:
len(X_df)

122

In [82]:
X_df = pd.DataFrame()

for image in skier_images:
    one_row_df = pd.DataFrame(extract_pose_features(image)).T
    one_row_df["file_names"] = os.path.basename(image)
    X_df = pd.concat([X_df, one_row_df])



I0000 00:00:1734613949.895202 8936762 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-20.7.2), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1734613950.192965 8992715 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1734613950.482702 8992715 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1734613950.660801 8936762 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-20.7.2), renderer: Intel(R) Iris(TM) Plus Graphics OpenGL Engine
W0000 00:00:1734613951.043313 8992759 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1734613951.302298 8992760 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling supp

In [83]:
X_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,123,124,125,126,127,128,129,130,131,file_names
0,0.594710,0.607358,-0.237547,0.991054,0.582905,0.613918,-0.261049,0.995308,0.581714,0.611666,...,0.072983,0.410019,0.182484,0.155460,0.038729,0.407979,0.205582,0.531176,0.017288,Linner Mario_002.jpg
0,0.689637,0.310918,-0.100025,0.999568,0.687223,0.293781,-0.059546,0.999075,0.685470,0.292523,...,0.979633,0.578431,0.792305,0.069970,0.838521,0.443621,0.809390,-0.416560,0.985046,Schmelzle Janne_004.jpg
0,0.551863,0.279512,-0.233709,0.999977,0.542985,0.266747,-0.200525,0.999954,0.541491,0.266109,...,0.952188,0.534059,0.729063,-0.336072,0.970187,0.466147,0.744449,-0.612756,0.988726,Mammey Max_004.jpg
0,0.592290,0.318468,0.059075,0.999672,0.586135,0.307145,0.091095,0.999296,0.584195,0.306294,...,0.747749,0.454012,0.733354,0.183739,0.157708,0.462932,0.741350,-0.317400,0.641894,Ketterer Colin_002.jpg
0,0.629260,0.340617,-0.108759,0.999928,0.627051,0.329049,-0.073965,0.999924,0.625345,0.328426,...,0.991248,0.422976,0.706580,-0.013507,0.835281,0.578374,0.696502,-0.350741,0.990911,Ketterer Colin_003.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.554173,0.191943,-0.074652,0.999993,0.551390,0.173833,-0.057135,0.999987,0.550162,0.172120,...,0.833363,0.501621,0.877867,-0.116753,0.896228,0.366283,0.868228,-0.432851,0.961370,Niemeyer Felix _001.jpg
0,0.571984,0.178787,-0.073571,0.999503,0.566462,0.156814,-0.062442,0.999253,0.564298,0.154234,...,0.989910,0.413145,0.858429,-0.043339,0.974025,0.543799,0.817124,-0.146574,0.992667,Klauser Hannes_003.jpg
0,0.626971,0.258003,-0.220021,0.999835,0.625264,0.244618,-0.200535,0.999657,0.622627,0.244001,...,0.980319,0.398584,0.736251,0.458858,0.959887,0.522686,0.753226,-0.176236,0.989362,Armbruster Nathalie _003.png
0,0.663020,0.270345,-0.148934,1.000000,0.657906,0.257391,-0.118758,0.999999,0.656092,0.257115,...,0.984692,0.565946,0.768067,-0.081894,0.995326,0.419799,0.781654,-0.362590,0.996457,Armbruster Nathalie _004.png


<div class="alert alert-block alert-info"> 
<b>Assumption:</b> The Numbers in each filename are correctly assigned to each class
</div>

### Extract Class from Filename

In [64]:
image_file_names = []
video_file_names = []

for image_path in skier_images:
    file_name_with_extension = os.path.basename(image_path)
    image_file_names.append(file_name_with_extension)

for video_path in skier_videos:
    file_name_with_extension = os.path.basename(video_path)
    video_file_names.append(file_name_with_extension)


videos_and_images_dict = {}

for video_string in video_file_names:
    images_per_vid_dict = {}
    for image_string in image_file_names:
        score = fuzz.ratio(video_string, image_string) # fuzzywuzzy used to compare simmilarity of strings
        images_per_vid_dict[image_string] = score

    # build from 5 images with hightest matching score
    top_5 = dict(sorted(images_per_vid_dict.items(), key=lambda item: item[1], reverse=True)[:5])
    key_list = list(top_5.keys())

    videos_and_images_dict[video_string] = key_list

In [66]:
file_names = []
for key in videos_and_images_dict:
    file_names = file_names + videos_and_images_dict[key] 

In [67]:
y_df = pd.DataFrame(file_names, columns=["file_names"])
y_df['class'] = y_df['file_names'].apply(extract_class) 

y_df


Unnamed: 0,file_names,class
0,Klauser Hannes_004.jpg,4
1,Klauser Hannes_005.jpg,5
2,Klauser Hannes_001.jpg,1
3,Klauser Hannes_002.jpg,2
4,Klauser Hannes_003.jpg,3
...,...,...
150,Uhlig Niklas_003.jpg,3
151,Uhlig Niklas_002.jpg,2
152,Uhlig Niklas_001.jpg,1
153,Uhlig Niklas_005.jpg,5


### Feature Label Matrix Merge

In [84]:
df_merged = X_df.merge(y_df, on='file_names')

In [85]:
df_merged

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,124,125,126,127,128,129,130,131,file_names,class
0,0.594710,0.607358,-0.237547,0.991054,0.582905,0.613918,-0.261049,0.995308,0.581714,0.611666,...,0.410019,0.182484,0.155460,0.038729,0.407979,0.205582,0.531176,0.017288,Linner Mario_002.jpg,2
1,0.689637,0.310918,-0.100025,0.999568,0.687223,0.293781,-0.059546,0.999075,0.685470,0.292523,...,0.578431,0.792305,0.069970,0.838521,0.443621,0.809390,-0.416560,0.985046,Schmelzle Janne_004.jpg,4
2,0.551863,0.279512,-0.233709,0.999977,0.542985,0.266747,-0.200525,0.999954,0.541491,0.266109,...,0.534059,0.729063,-0.336072,0.970187,0.466147,0.744449,-0.612756,0.988726,Mammey Max_004.jpg,4
3,0.592290,0.318468,0.059075,0.999672,0.586135,0.307145,0.091095,0.999296,0.584195,0.306294,...,0.454012,0.733354,0.183739,0.157708,0.462932,0.741350,-0.317400,0.641894,Ketterer Colin_002.jpg,2
4,0.629260,0.340617,-0.108759,0.999928,0.627051,0.329049,-0.073965,0.999924,0.625345,0.328426,...,0.422976,0.706580,-0.013507,0.835281,0.578374,0.696502,-0.350741,0.990911,Ketterer Colin_003.jpg,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,0.554173,0.191943,-0.074652,0.999993,0.551390,0.173833,-0.057135,0.999987,0.550162,0.172120,...,0.501621,0.877867,-0.116753,0.896228,0.366283,0.868228,-0.432851,0.961370,Niemeyer Felix _001.jpg,1
118,0.571984,0.178787,-0.073571,0.999503,0.566462,0.156814,-0.062442,0.999253,0.564298,0.154234,...,0.413145,0.858429,-0.043339,0.974025,0.543799,0.817124,-0.146574,0.992667,Klauser Hannes_003.jpg,3
119,0.626971,0.258003,-0.220021,0.999835,0.625264,0.244618,-0.200535,0.999657,0.622627,0.244001,...,0.398584,0.736251,0.458858,0.959887,0.522686,0.753226,-0.176236,0.989362,Armbruster Nathalie _003.png,3
120,0.663020,0.270345,-0.148934,1.000000,0.657906,0.257391,-0.118758,0.999999,0.656092,0.257115,...,0.565946,0.768067,-0.081894,0.995326,0.419799,0.781654,-0.362590,0.996457,Armbruster Nathalie _004.png,4


In [88]:
len(X_df)

122