In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os

In [2]:
def load_data(imu_path, video_path):
    print(imu_path, video_path)
    imu_df = pd.read_csv(imu_path)
    video_df = pd.read_csv(video_path)

    imu_df = imu_df.sort_values("Timestamp")
    video_df = video_df.sort_values("Timestamp")

    return imu_df, video_df

In [3]:
def merge_imu_video(imu_df, video_df):
    merged_df = pd.merge_asof(video_df, imu_df, on="Timestamp")
    return merged_df

In [4]:
participant_data = []
imu_path = './processed/annotated/'
video_path = './processed/metadata/'
for i in range(1):  # Loop through 10 participants
    
    imu_df, video_df = load_data(imu_path+os.listdir(imu_path)[i], video_path+os.listdir(video_path)[i])
    merged_df = merge_imu_video(imu_df, video_df)
    
    participant_data.append(merged_df)  # Store each participant’s merged data

./processed/annotated/participant_01.csv ./processed/metadata/participant_01.csv


In [5]:
final_df = pd.concat(participant_data, ignore_index=True)

In [6]:
final_df.head()

Unnamed: 0.1,Unnamed: 0,Timestamp,frame_path,QuatI_0,QuatJ_0,QuatK_0,QuatSum_0,QuatI_1,QuatJ_1,QuatK_1,...,QuatSum_3,QuatI_4,QuatJ_4,QuatK_4,QuatSum_4,QuatI_5,QuatJ_5,QuatK_5,QuatSum_5,Activity
0,0,0.0,../../video_data/frames/participant_1\frame_01...,0.135776,-0.690791,0.703221,0.099264,-0.561731,-0.481653,-0.120886,...,-0.571519,-0.663916,0.397668,-0.258874,-0.577979,0.163317,-0.767408,0.495741,0.372363,Unknown
1,1,0.001,../../video_data/frames/participant_1\frame_01...,0.135776,-0.690791,0.703221,0.099264,-0.561731,-0.481653,-0.120886,...,-0.571519,-0.663916,0.397668,-0.258874,-0.577979,0.163317,-0.767408,0.495741,0.372363,Unknown
2,2,0.006,../../video_data/frames/participant_1\frame_01...,0.135776,-0.690791,0.703221,0.099264,-0.561731,-0.481653,-0.120886,...,-0.571519,-0.663916,0.397668,-0.258874,-0.577979,0.163317,-0.767408,0.495741,0.372363,Unknown
3,3,0.007,../../video_data/frames/participant_1\frame_01...,0.135776,-0.690791,0.703221,0.099264,-0.561731,-0.481653,-0.120886,...,-0.571519,-0.663916,0.397668,-0.258874,-0.577979,0.163317,-0.767408,0.495741,0.372363,Unknown
4,4,0.035,../../video_data/frames/participant_1\frame_01...,0.135819,-0.69079,0.703219,0.099221,-0.571936,0.540918,-0.231226,...,0.372091,0.135819,-0.69079,0.703219,0.099221,-0.561687,-0.48167,-0.12093,-0.661723,Unknown


In [7]:
X_imu = final_df[[col for col in final_df.columns if 'Quat' in col]].values
X_imu[:5]

array([[ 0.13577624, -0.69079099,  0.70322061,  0.09926426, -0.56173091,
        -0.48165279, -0.12088628, -0.6617065 , -0.70092144,  0.43164692,
         0.08855571, -0.56084575, -0.57200712,  0.54102999, -0.23121554,
        -0.57151884, -0.66391614,  0.39766833, -0.25887439, -0.57797864,
         0.16331711, -0.76740799,  0.49574051,  0.37236252],
       [ 0.13577624, -0.69079099,  0.70322061,  0.09926426, -0.56173091,
        -0.48165279, -0.12088628, -0.6617065 , -0.70092144,  0.43164692,
         0.08855571, -0.56084575, -0.57200712,  0.54102999, -0.23121554,
        -0.57151884, -0.66391614,  0.39766833, -0.25887439, -0.57797864,
         0.16331711, -0.76740799,  0.49574051,  0.37236252],
       [ 0.13577624, -0.69079099,  0.70322061,  0.09926426, -0.56173091,
        -0.48165279, -0.12088628, -0.6617065 , -0.70092144,  0.43164692,
         0.08855571, -0.56084575, -0.57200712,  0.54102999, -0.23121554,
        -0.57151884, -0.66391614,  0.39766833, -0.25887439, -0.57797864,
  

In [8]:
X_videopaths = final_df["frame_path"].values
X_videopaths[:5]

array(['../../video_data/frames/participant_1\\frame_01_0.0.jpg',
       '../../video_data/frames/participant_1\\frame_01_0.001.jpg',
       '../../video_data/frames/participant_1\\frame_01_0.006.jpg',
       '../../video_data/frames/participant_1\\frame_01_0.007.jpg',
       '../../video_data/frames/participant_1\\frame_01_0.035.jpg'],
      dtype=object)

In [9]:
y_train = final_df["Activity"].values
y_train[:5]

array(['Unknown', 'Unknown', 'Unknown', 'Unknown', 'Unknown'],
      dtype=object)

## **Extracting image features from a pretrained model**

In [10]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [11]:
feature_extractor = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")

  feature_extractor = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")


In [None]:
def extract_image_features(image_paths):
    features = []
    for path in image_paths:
        img = load_img(path, target_size=(224, 224))

        img_array = img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0) #to adding batch dimension
        img_array = img_array/255.0 #normalizing pixel values between 0 and 1

        feature = feature_extractor.predict(img_array)
        features.append(feature.flatten())

    return np.array(features)

image_features = extract_image_features(X_videopaths)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms

In [13]:
image_features[:1]

array([[0.69155294, 0.05983061, 0.8619611 , ..., 0.89078623, 0.        ,
        1.3124852 ]], dtype=float32)

In [14]:
image_features.shape

(15405, 1280)

In [15]:
final_df_filtered = final_df.copy()
final_df_filtered["Image_Features"] = list(image_features)

final_df_filtered.to_csv("./processed/multimodal/participant_01.csv")

In [None]:
df_loaded = pd.read_csv("imu_video_features.csv")

# Convert image features back to NumPy array
df_loaded["Image_Features"] = df_loaded["Image_Features"].apply(lambda x: np.array(eval(x)))