In [4]:
import numpy as np
import pandas as pd
import csv, os, re, time, logging, cv2, torch
from pathlib import Path
from importlib.metadata import version
from facenet_pytorch import InceptionResnetV1, MTCNN
from PIL import Image
from moviepy.editor import VideoFileClip

In [5]:
# Set logging level to DEBUG
logging.basicConfig(level=logging.DEBUG)

# Replace 'video.mp4' with the path to your media file
#probe = ffmpeg.probe('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/Camera/GUT/S01/C02/Untitled 140.mp4')
#print(probe)

In [6]:
version('ffmpeg-python')

PackageNotFoundError: No package metadata was found for ffmpeg-python

In [7]:
PYCHARM_DEBUG=True

In [8]:
def print_table(table):
    for row in table:
        print(row)

def create_table(tab1, tab2, tab3):
    result_tab = []

    emotions = ['Unknown', 'Happy', 'Sad', 'Scared', 'Disgusted', 'Surprised', 'Angry']

    for row1, row2, row3 in zip(tab1, tab2, tab3):
        sum_rows = [int(a) + int(b) + int(c) for a, b, c in zip(row1[1:], row2[1:], row3[1:])]

        total_sum = sum(sum_rows)
        if total_sum > 0:
            # Calculate the percentage distribution
            percentages = [round((x / total_sum) * 100, 2) for x in sum_rows]
        else:
            percentages = [0] * len(sum_rows)

        result_tab.append([row1[0]] + percentages)

    return result_tab

# Makes table equal
def fill_table(table, csvreader):
    seconds = 0
    stop = 0
    for row in csvreader:
        row[0] = int(row[0].split('.')[0])
        if seconds <= row[0] and stop == 0:  #fill table with missing seconds
            for i in range(0, row[0]):
                table.append([i, '0', '0', '0', '0', '0', '0', '0'])
                seconds += 1
            stop = 1
            table.append(row)
        else:
            table.append(row)


# Create percentages for every second
def csv_files_reader(base_path):
    index = ['I', 'II', 'III']
    tab1 = []
    tab2 = []
    tab3 = []

    if os.path.exists(base_path):

        for i in index:
            file = os.path.join(base_path, i)
            for file_name in os.listdir(file):
                file_path = os.path.join(file, file_name)

                with open(file_path, 'r') as file:
                    csvreader = csv.reader(file)
                    header = next(csvreader)

                    if i == 'I':
                        fill_table(tab1, csvreader)
                    elif i == 'II':
                        fill_table(tab2, csvreader)
                    elif i == 'III':
                        fill_table(tab3, csvreader)
    else:
        print(f"File path {base_path} doesn't exist.")

    large_table = max([tab1, tab2, tab3], key=len)

    for tab in [tab1, tab2, tab3]:
        if len(tab) < len(large_table):
            diff = len(large_table) - len(tab)

            if len(tab) == 0:
                sec = 0
                for sec in range(len(large_table)):
                    tab.append([sec, '0', '0', '0', '0', '0', '0', '0'])
            else:
                sec = tab[-1][0]  #latest second in table
                for sec in range(tab[-1][0] + 1, len(large_table)):
                    tab.append([sec, '0', '0', '0', '0', '0', '0', '0'])

    return create_table(tab1, tab2, tab3)


def get_boris_vector(path, target):
    table = csv_files_reader(path)
    df = pd.DataFrame(table)
    df.to_csv(target, index=False)

In [9]:
def get_video_metadata(file_path):
    try:
        # Get the file status
        file_stat = os.stat(file_path)

        # Get the last modified time
        last_modified_time_str = time.ctime(file_stat.st_mtime)
        last_modified_timestamp = int(file_stat.st_mtime)

        # Get the duration of the video
        with VideoFileClip(file_path) as video:
            duration = video.duration

        metadata = {
            'last_modified_time': last_modified_time_str,
            'last_modified_timestamp': last_modified_timestamp,
            'duration': int(duration),  # Duration in seconds
            'initial_timestamp' : last_modified_timestamp - int(duration)
        }

        return metadata

    except FileNotFoundError:
        return f"The file {file_path} does not exist."
    except Exception as e:
        return f"An error occurred: {str(e)}"


In [10]:
# Get the moment of the start of the vector and its frequency
def get_unix_and_hz(file_path):
    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        unix = int(float(next(reader)[0]))  # Convert the first cell to integer
        hz = int(float(next(reader)[0]))  # Convert the second cell to integer
    return unix, hz

In [11]:
# Trim the vector to be the multiple of its frequency
def trim_vector(vector, rate):
    length = len(vector)
    if length % rate != 0:
        # Calculate how many elements need to be removed
        excess_elements = length % rate
        # Trim the vector
        vector = vector[:-excess_elements]
    return vector

In [12]:
# Change the frequency by averaging the values
def mean_of_values(vector, rate):
    # Ensure the length of the vector is a multiple of n
    if len(vector) % rate != 0:
        raise ValueError("Length of the vector must be a multiple of frequency")

    # Reshape the vector into a 2D array where each row is a group of n elements
    reshaped_vector = np.reshape(vector, (-1, rate))

    # Calculate the mean along the rows
    mean_values = np.mean(reshaped_vector, axis=1)

    return mean_values

In [13]:
def extract_frame_embeddings(path):
    # Step 1: Initialize FaceNet model and MTCNN detector
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    mtcnn = MTCNN(keep_all=True, device=device)  # MTCNN for face detection
    facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)  # Pre-trained FaceNet model
    
    # Step 2: Load video file and get frame rate
    cap = cv2.VideoCapture(path)

    if not cap.isOpened():
        print("Error: Could not open video.")
    else:
        print("Video opened successfully!")

    # Calculate frames per second (fps) and total frames
    frame_interval = cap.get(cv2.CAP_PROP_FPS)  # Frames per second of the video

    # Step 3: Process video at 1 second intervals
    frame_count = 0
    frame_embeddings = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Check if the frame is at the 1-second interval
        if frame_count % frame_interval < 1:
            # Convert frame to RGB (OpenCV uses BGR by default)
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_pil = Image.fromarray(frame_rgb)

            # Step 4: Detect faces in the frame
            boxes, _ = mtcnn.detect(frame_pil)
            if boxes is None:
                num_of_features = 512

                frame_embeddings.append(np.zeros(num_of_features, dtype=np.float32))
                frame_count += 1
                continue  # Skip frames with no detected faces

            # Step 5: Crop and align each detected face
            faces = mtcnn(frame_pil)  # This will return aligned faces

            # Step 6: Generate embeddings for each face
            if faces is not None:
                faces = faces.to(device)
                embeddings = facenet(faces)  # Generate embeddings
                frame_embeddings.append(embeddings.cpu().detach().numpy())  # Store embeddings 

        frame_count += 1

    # Step 7: Release video capture
    cap.release()
    frame_embeddings = [frame_embeddings[0][0] for a in frame_embeddings]
    return pd.DataFrame(frame_embeddings)


In [14]:
def slice_vectors(video_path, biosignal_path, input_storage_path):
    
    # Get the video metadata
    video = get_video_metadata(video_path)
    print(video)
    # Get the path for EDA, TEMP and HR
    EDA_path = os.path.join(biosignal_path, 'EDA.csv')
    TEMP_path = os.path.join(biosignal_path, 'TEMP.csv')
    HR_path = os.path.join(biosignal_path, 'HR.csv')

    # Get only the data
    EDA = pd.read_csv(EDA_path, skiprows = 2, header = None)
    TEMP = pd.read_csv(TEMP_path, skiprows = 2, header = None)
    HR = pd.read_csv(HR_path, skiprows = 2, header = None)
    frame_embeddings = extract_frame_embeddings(video_path)

    # Get the starting time and frequency
    unix_EDA, hz_EDA = get_unix_and_hz(EDA_path)
    unix_TEMP, hz_TEMP = get_unix_and_hz(TEMP_path)
    unix_HR, hz_HR = get_unix_and_hz(HR_path)
    unix_video = video['initial_timestamp']

    # Unificate the frequencies  
    EDA_mean = mean_of_values(trim_vector(EDA, hz_EDA), hz_EDA)
    TEMP_mean = mean_of_values(trim_vector(TEMP, hz_TEMP), hz_TEMP)
    HR_mean = mean_of_values(trim_vector(HR, hz_HR), hz_HR)

    # Get the lengths
    length_EDA = len(EDA_mean)
    length_TEMP = len(TEMP_mean)
    length_HR = len(HR_mean)
    length_video = video['duration']
    
    # Get the vectors of starts and ends for biosignals and video
    starts = [unix_HR, unix_TEMP, unix_EDA, unix_video]
    ends = [unix_HR + length_HR, unix_TEMP + length_TEMP, unix_EDA + length_EDA, unix_video + length_video]

    # Get the latest start of any vector
    last_start = max(starts)
    
    # Get the earliest end of any vector
    first_end = min(ends)

    # Get matching indexes for start and end for every vector
    EDA_first_index = last_start - unix_EDA
    EDA_last_index = first_end - unix_EDA
    TEMP_first_index = last_start - unix_TEMP
    TEMP_last_index = first_end - unix_TEMP
    HR_first_index = last_start - unix_HR
    HR_last_index = first_end - unix_HR
    video_first_index = last_start - unix_video
    video_last_index = first_end - unix_video


    print('EDA: ', EDA_last_index - EDA_first_index)
    print('TEMP: ', TEMP_last_index - TEMP_first_index)
    print('HR: ', HR_last_index - HR_first_index)
    print('video: ', video_last_index - video_first_index)
    
    # Slice the biosignals based on the index
    sliced_EDA = EDA[EDA_first_index:EDA_last_index]
    sliced_TEMP = TEMP[TEMP_first_index:TEMP_last_index]
    sliced_HR = HR[HR_first_index:HR_last_index]
    sliced_frame_embeddings = frame_embeddings[video_first_index:video_last_index]
    
    # Create one df with all biosignals
    # TODO: ADD VECTOR OF EMBEDDINGS
    input = pd.concat([sliced_EDA.reset_index(drop=True), sliced_TEMP.reset_index(drop=True), sliced_HR.reset_index(drop=True), sliced_frame_embeddings.reset_index()], axis=1)

    # Rename column names
    input.columns.values[0:3] = ["EDA", "TEMP", "HR"]
    input = input.drop(input.columns[3], axis=1)
    # Add the biosignals to file
    input.to_csv(input_storage_path, index=False, sep=';')

In [15]:
def traverse():
    root_dir = '//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE'
    target_root_dir = '//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis'
    boris_ext = 'Analysis/BORIS/'
    type = ['Camera', 'Wristband']
    research_centers = ['GUT']
    #research_centers = ['GUT', 'ITU-YU', 'MAAP']
    s_values = ["S01"]
    #s_values = ["S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11" ]
    c_values = ["C01"]
    #c_values = ["C01", "C02", "C03", "C04", "C05", "C06", "C07", "C08", "C09", "C10", "C11", "C12", "C13" ]
    for research_center in research_centers:
        for session in s_values:
            # Create the session path for biosignal and camera + BORIS
            camera_path = Path(root_dir).joinpath(type[0], research_center, session)
            signals_path = Path(root_dir).joinpath(type[1], research_center, session)
            if camera_path.is_dir() and signals_path.is_dir():
                # Create the meeting path for biosignal and camera + BORIS
                for camera in c_values:
                    exact_camera_path = Path(camera_path).joinpath(camera)
                    exact_signals_path = Path(signals_path).joinpath(camera)
                    if exact_camera_path.is_dir() and exact_signals_path.is_dir():
                        boris_path = Path(exact_camera_path).joinpath(boris_ext)
                        if boris_path.is_dir():
                            # Quick fix of two MAAP sessions being divided TODO: Fix this
                            if not(research_center == 'MAAP' and ((session == 'S01' and camera == 'C05') or (session == 'S03' and camera == 'C05'))):
                                # Find exact video name
                                video_ext = '_video.mp4'
                                if research_center == 'GUT':
                                    mp4_pattern = re.compile(r'^Untitled \d+\.mp4$')
                                elif research_center == 'ITU-YU':
                                    mp4_pattern = re.compile(r'^ITU-C\d{2}-S\d{2}-\d{8}-Camera\.mp4$')
                                elif research_center == 'MAAP':
                                    mp4_pattern = re.compile(r'^MAAP-C\d{2}-S\d{2}-\d{8}-Camera\.AVI$')
                                    video_ext = '_video.AVI'
                                for files_in_camera_dir in os.listdir(exact_camera_path):
                                    if mp4_pattern.match(files_in_camera_dir):
                                        video_path = os.path.join(exact_camera_path, files_in_camera_dir)
                                        print(video_path)
                                # TODO: TRY EXCEPT FOR not matching pattern, skip iteration
                                filename = research_center + '_' + session + '_' + camera
                                boris_filename = filename + 'BORIS.csv'
                                boris_target_dir = Path(target_root_dir).joinpath(research_center, boris_filename)
                                
                                input_filename = filename + '_input.csv'
                                input_target_dir = Path(target_root_dir).joinpath(research_center, input_filename)
                                
                                # Create combined BORIS vector
                                #get_boris_vector(boris_path, boris_target_dir)
                                
                                # Create one vector of biosignals, sliced BORIS and sliced video
                                slice_vectors("resources/s01c01.mp4", exact_signals_path, input_target_dir)

                                # TODO: sprawdz dlaczego wektory mają różne długości 
traverse()


\\153.19.52.107\emboa\IO3-sessions\NEW STRUCTURE\Camera\GUT\S01\C01\Untitled 139.mp4
{'last_modified_time': 'Fri Jun 18 11:15:24 2021', 'last_modified_timestamp': 1624007724, 'duration': 609, 'initial_timestamp': 1624007115}
Video opened successfully!
EDA:  546
TEMP:  546
HR:  546
video:  546
