In [19]:
import numpy as np
import pickle
import json
import os

In [20]:
def convert_labels(activity_start, activity_end, segment_length, overlap_length, sr):
    segment_samples = segment_length * sr
    overlap_samples = overlap_length * sr
    step_samples = segment_samples - overlap_samples
    activity_start_samples = activity_start*sr
    activity_end_samples = activity_end*sr

    activity_start_segment = (activity_start_samples-overlap_samples)//step_samples
    activity_end_segment = (activity_end_samples-overlap_samples)//step_samples

    return activity_start_segment, activity_end_segment

In [21]:
def find_meta_data(id):
    wav_files = []
    metadata_files = []
    recording_ids = []

    root_folder_path ="C:\\Users\\MajaE\\src\\repos\\master_ML\\Data"
    # Loop through each academic year in data folder
    root_folder_path_os = os.path.abspath(root_folder_path)
    for year_folder in os.listdir(root_folder_path_os):
        year_folder_path = os.path.join(root_folder_path_os,year_folder)
        # Ensure directory (folder)
        if os.path.isdir(year_folder_path): 
            # Loop through each subject in year
            for subject_folder in os.listdir(year_folder_path):
                subject_folder_path = os.path.join(year_folder_path,subject_folder)
                # Ensure directory (folder)
                if os.path.isdir(subject_folder_path): 
                    # Loop through each recording for subject
                    for recording_folder in os.listdir(subject_folder_path):
                        recording_folder_path = os.path.join(subject_folder_path,recording_folder)
                        if recording_folder == id:
                            if os.path.isdir(recording_folder_path):
                                metadata_file_path = os.path.join(recording_folder_path, f"{recording_folder}.json")
                                return metadata_file_path
    return None

In [22]:
def find_disturbances(id):
    id_file = find_meta_data(id)

    # Extract labels from metadata files
    meta_data = ""
    audio_offset = 0
    with open(id_file, 'r') as file:
        meta_data = json.load(file)
    if "audio_offset" in meta_data:
        audio_offset = meta_data["audio_offset"]
    
    disturbances = []
    if "disturbances" in meta_data:
        for disturbance in meta_data["disturbances"]:
            disturbances.append((disturbance["start"]-audio_offset,disturbance["end"]-audio_offset,disturbance["type"]))
    return disturbances

In [23]:
features, labels, recording_ids = [], [], []
# Extract features from previous calculations
with open(f'..\\features_pos1_1.pickle', 'rb') as handle:
    features,labels, recording_ids = pickle.load(handle)

In [24]:
dictionary = {"talking": 2, "phys_activity": 3, "rub_mic": 4, "move_leads": 5, "drink_water": 6}

for recording_idx, recording_id in enumerate(recording_ids):
    disturbances = find_disturbances(recording_id)
    for (disturbance_start, disturbance_end, type) in disturbances:
        disturbance_segment_start, disturbance_segment_end = convert_labels(disturbance_start,disturbance_end,1,0.5,22050)
        labels[recording_idx][int(disturbance_segment_start):int(disturbance_segment_end)] = dictionary[type]

with open(f'..\\features_pos1_1_multiclass_2.pickle', 'wb') as handle:
        pickle.dump([features,labels, recording_ids],handle, protocol=pickle.HIGHEST_PROTOCOL)
        