In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import os
import re
import time
from moviepy.editor import VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

from pathlib import Path


In [18]:
PYCHARM_DEBUG=True

In [7]:
def traverse():
    root_dir = '//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/Camera'
    research_centers = ['GUT', 'ITU-YU', 'MAAP']
    s_values = ["S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11" ]
    c_values = ["C01", "C02", "C03", "C04", "C05", "C06", "C07", "C08", "C09", "C10", "C11", "C12", "C13" ]
    for research_center in research_centers:
        for session in s_values:
            path = Path(root_dir).joinpath(research_center, session)
            if path.is_dir():
                for camera in c_values:
                    path = Path(root_dir).joinpath(research_center, session, camera)
                    if path.is_dir():
                        print(f"{path} exists.")# Call to function for vectorizing
                        # Call to function for combining BORIS csvs
traverse()

\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S01\C01 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S01\C02 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S01\C03 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S02\C01 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S02\C02 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S02\C03 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S03\C01 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S03\C02 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\ITU-YU\S01\C01 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\ITU-YU\S01\C02 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\ITU-YU\S01\C03 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\ITU-YU\S01\C04 exists.
\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\ITU-YU\S01\C05 exist

In [3]:
 def get_video_metadata(file_path):
    try:
        # Get the file status
        file_stat = os.stat(file_path)

        # Get the last modified time
        last_modified_time_str = time.ctime(file_stat.st_mtime)
        last_modified_timestamp = int(file_stat.st_mtime)

        # Get the duration of the video
        with VideoFileClip(file_path) as video:
            duration = video.duration

        metadata = {
            'last_modified_time': last_modified_time_str,
            'last_modified_timestamp': last_modified_timestamp,
            'duration': int(duration),  # Duration in seconds
            'initial_timestamp' : last_modified_timestamp - int(duration)
        }

        return metadata

    except FileNotFoundError:
        return f"The file {file_path} does not exist."
    except Exception as e:
        return f"An error occurred: {str(e)}"
    

In [4]:
# Get the moment of the start of the vector and its frequency
def get_unix_and_hz(file_path):
    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        unix = int(float(next(reader)[0]))  # Convert the first cell to integer
        hz = int(float(next(reader)[0]))  # Convert the second cell to integer
    return unix, hz

In [5]:
# Trim the vector to be the multiple of its frequency
def trim_vector(vector, rate):
    length = len(vector)
    if length % rate != 0:
        # Calculate how many elements need to be removed
        excess_elements = length % rate
        # Trim the vector
        vector = vector[:-excess_elements]
    return vector

In [6]:
# Change the frequency by averaging the values
def mean_of_values(vector, rate):
    # Ensure the length of the vector is a multiple of n
    if len(vector) % rate != 0:
        raise ValueError("Length of the vector must be a multiple of frequency")

    # Reshape the vector into a 2D array where each row is a group of n elements
    reshaped_vector = np.reshape(vector, (-1, rate))

    # Calculate the mean along the rows
    mean_values = np.mean(reshaped_vector, axis=1)

    return mean_values

In [None]:
def slice_vectors(video_path, biosignal_path, storage_path):
    
    # Get the video metadata
    video = get_video_metadata(video_path)
    
    # Get the path for EDA, TEMP and HR
    EDA_path = biosignal_path+'EDA.csv'
    TEMP_path = biosignal_path+'TEMP.csv'
    HR_path = biosignal_path+'HR.csv'

    # Get only the data
    EDA = pd.read_csv(EDA_path, skiprows = 2, header = None)
    TEMP = pd.read_csv(TEMP_path, skiprows = 2, header = None)
    HR = pd.read_csv(HR_path, skiprows = 2, header = None)

    # Get the starting time and frequency
    unix_EDA, hz_EDA = get_unix_and_hz(EDA_path)
    unix_TEMP, hz_TEMP = get_unix_and_hz(TEMP_path)
    unix_HR, hz_HR = get_unix_and_hz(HR_path)
    unix_video = video['initial_timestamp']

    # Unificate the frequencies  
    EDA_mean = mean_of_values(trim_vector(EDA, hz_EDA), hz_EDA)
    TEMP_mean = mean_of_values(trim_vector(TEMP, hz_TEMP), hz_TEMP)
    HR_mean = mean_of_values(trim_vector(HR, hz_HR), hz_HR)

    # Get the lengths
    length_EDA = len(EDA_mean)
    length_TEMP = len(TEMP_mean)
    length_HR = len(HR_mean)
    length_video = video['duration']
    
    # Get the vectors of starts and ends for biosignals and video
    starts = [unix_HR, unix_TEMP, unix_EDA, unix_video]
    ends = [unix_HR + length_HR, unix_TEMP + length_TEMP, unix_EDA + length_EDA, unix_video + length_video]

    # Get the latest start of any vector
    last_start = max(starts)
    
    # Get the earliest end of any vector
    first_end = min(ends)

    # Get matching indexes for start and end for every vector
    EDA_first_index = last_start - unix_EDA
    EDA_last_index = first_end - unix_EDA
    TEMP_first_index = last_start - unix_TEMP
    TEMP_last_index = first_end - unix_TEMP
    HR_first_index = last_start - unix_HR
    HR_last_index = first_end - unix_HR
    video_first_index = last_start - unix_video
    video_last_index = first_end - unix_video

    # Slice the biosignals based on the index
    sliced_EDA = EDA[EDA_first_index:EDA_last_index]
    sliced_TEMP = TEMP[TEMP_first_index:TEMP_last_index]
    sliced_HR = HR[HR_first_index:HR_last_index]

    # Cut the video based on the index and add a file
    ffmpeg_extract_subclip(movie_path, video_first_index, video_last_index, targetname=storage_path+'sliced.mp4')

    # Add the sliced vectors to files
    sliced_EDA_path = storage_path+'sliced_EDA.csv'
    sliced_TEMP_path = storage_path+'sliced_TEMP.csv'
    sliced_HR_path = storage_path+'sliced_HR.csv'
    sliced_EDA.to_csv(sliced_EDA_path, index=False)
    sliced_TEMP.to_csv(sliced_TEMP_path, index=False)
    sliced_HR.to_csv(sliced_HR_path, index=False)

In [8]:
# trash needed to be moved
main_directory = 'C:/Users/oskik/PycharmProjects/InżynierkaSandbox/'
s01_values = ["S01_boris/", "S01_wristband/"]
c_values = ["C01/", "C02/", "C03/"]
mp3_pattern = re.compile(r'Untitled \d+\.mp4')

for c_value in c_values:
    for s01_value in s01_values:
        if s01_value == "S01_boris/":
            movie_dir_path = os.path.join(main_directory, s01_value, c_value)
            print(movie_dir_path)
            if os.path.isdir(movie_dir_path):
                for file_name in os.listdir(movie_dir_path):
                    print(file_name)
                    if mp3_pattern.match(file_name):
                        movie_path = os.path.join(main_directory, s01_value, c_value, file_name)
                        print(movie_path)

            video = get_video_metadata(movie_path)
            unix_video = video['initial_timestamp']

        elif s01_value == "S01_wristband/":
            main_path = os.path.join(main_directory, s01_value, c_value)

            EDA_path = main_path+'EDA.csv'
            TEMP_path = main_path+'TEMP.csv'
            HR_path = main_path+'HR.csv'

            EDA = pd.read_csv(EDA_path, skiprows = 2, header = None)
            TEMP = pd.read_csv(TEMP_path, skiprows = 2, header = None)
            HR = pd.read_csv(HR_path, skiprows = 2, header = None)

            unix_EDA, hz_EDA = get_unix_and_hz(EDA_path)
            unix_TEMP, hz_TEMP = get_unix_and_hz(TEMP_path)
            unix_HR, hz_HR = get_unix_and_hz(HR_path)

            EDA_mean = mean_of_values(trim_vector(EDA, hz_EDA), hz_EDA)
            TEMP_mean = mean_of_values(trim_vector(TEMP, hz_TEMP), hz_TEMP)
            HR_mean = mean_of_values(trim_vector(HR, hz_HR), hz_HR)

            length_EDA = len(EDA_mean)
            length_TEMP = len(TEMP_mean)
            length_HR = len(HR_mean)

    starts = [unix_HR, unix_TEMP, unix_EDA, unix_video]
    ends = [unix_HR + length_HR, unix_TEMP + length_TEMP, unix_EDA + length_EDA, unix_video + video['duration']]

    last_start = max(starts)
    first_end = min(ends)

    EDA_first_index = last_start - unix_EDA
    EDA_last_index = first_end - unix_EDA
    TEMP_first_index = last_start - unix_TEMP
    TEMP_last_index = first_end - unix_TEMP
    HR_first_index = last_start - unix_HR
    HR_last_index = first_end - unix_HR
    video_first_index = last_start - unix_video
    video_last_index = first_end - unix_video

    sliced_EDA = EDA[EDA_first_index:EDA_last_index]
    sliced_TEMP = TEMP[TEMP_first_index:TEMP_last_index]
    sliced_HR = HR[HR_first_index:HR_last_index]

    ffmpeg_extract_subclip(movie_path, video_first_index, video_last_index, targetname=movie_dir_path+'sliced.mp4')

    sliced_EDA_path = main_path+'sliced_EDA.csv'
    sliced_TEMP_path = main_path+'sliced_TEMP.csv'
    sliced_HR_path = main_path+'sliced_HR.csv'

    sliced_EDA.to_csv(sliced_EDA_path, index=False)
    sliced_TEMP.to_csv(sliced_TEMP_path, index=False)
    sliced_HR.to_csv(sliced_HR_path, index=False)