In [1]:
import glob
import os
import numpy as np
from scipy.signal import butter, filtfilt
from pydub import AudioSegment
import scipy.io.wavfile as wav
from tqdm import tqdm
from pydub import AudioSegment
import re
import pandas as pd

In [2]:
# Note The threshold depends also on the input volume set on the computer
def _get_voice_onset(signal, threshold = 200, fs=44100, min_time=100):
    '''
    signal : numpy.ndarray
             signal in. Should be the envelope of the raw signal for accurate results
    threshold : int
                Amplitude threshold for voice onset.
                (Threshold = 200 with NYUAD MEG mic at 75% input volume seems to work well)
    fs : int
         Sampling frequency
    min_time : int (ms)
             Time in ms after the threshold is crossed used to calculate
              the median amplitude and decide if it was random burst of noise
              or speech onset.
    '''

    n_above_thresh = int(fs/min_time) # convert time above threshold to number of samples.

    indices_onset = np.where(signal >= threshold)[0] # All indices above threshold
    # Next, find the first index that where the MEDIAN stays above threshold for the next 10ms
    # Not using the MEAN because sensitive to a single extreme value
    # Note 44.1 points per millesconds (for fs=44100)
    # 10ms = 441 points
    for i in indices_onset:
        median_mintime = np.median(np.abs(signal[i:i+n_above_thresh])) # median value in the timewindow of length min_time
        if median_mintime >= threshold:
            idx_onset = i
            onset_time = idx_onset / float(fs) * 1000.0

            return idx_onset, onset_time
    return np.nan, np.nan # if no point exceeds the threshold.
                          # Return "None" instead of None in order to be able to append it to a list later on

In [3]:
#--- Based on Jarne (2017) "Simple empirical algorithm to obtain signal envelope in three steps"
def _get_envelope(signal, fs=44100, N=200, cutoff=2000):
    '''
    signal: input wav (numpy.ndarray)
    fs: sampling frequency
    N: number of samples per chunk (in part (2))
    cutoff: LPF cutoff, the smaller the cuttoff the stronger the filter. (tweek this).
    '''
    # 1) Take the absolute value of the signal
    abs_signal = abs(signal)
    # 2) Seperate into samples of N, and get peak value of each sample.
    chunked_signal = [abs_signal[i:i+N] for i in range(0, len(abs_signal), N)]
    new_signal = []
    for chunk in chunked_signal: #Then for each chunk, replace all values by max value
        max_value = np.max(chunk)
        new_chunk = [max_value for i in range(len(chunk))]
        new_signal.append(new_chunk)
    # new_signal = np.array(new_signal).flatten()
    new_signal = np.array([item for sublist in new_signal for item in sublist]) # flatten list of lists
    # 3) LPF the new_signal (the envelope, not the original signal)
    def FilterSignal(signal_in, fs, cutoff):
        B, A = butter(1, cutoff / (fs / 2.0), btype='low')
        filtered_signal = filtfilt(B, A, signal_in, axis=0)
        return filtered_signal
    filteredSignal = FilterSignal(new_signal, fs, cutoff)

    return filteredSignal

In [4]:
def convert_ogg_to_wav(input_file, output_file):

    if os.path.exists(output_file):
        print(output_file, "exists already")
        return

    output_dir = os.path.dirname(output_file)
    os.makedirs(output_dir, exist_ok=True)

    # Load .ogg file
    audio = AudioSegment.from_ogg(input_file)
    
    # Export as .wav file
    audio.export(output_file, format="wav")

In [5]:
round_pattern = r"n-(\d+)-p"
dyad_pattern = r"(p\d+p\d+)"
player_pattern = r"u-(p\d+)"

# Define the path to the "trimed" folder
base_path = "C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed"


onset_time_data = dict()
# Loop over all subfolders in the base path
for folder in glob.glob(os.path.join(base_path, '*/')):
    print(f"Folder: {folder}")
    ogg_files = glob.glob(os.path.join(folder, '*.ogg'))  # List all .ogg files in the folder
    if len(ogg_files) > 4:
        print("folder", folder, "more than 4 files")
        continue
    print(f"Number of .ogg files: {len(ogg_files)}")
    for file in ogg_files:

        #Only process dry files. 
        if file.endswith(("audio-wet.ogg", "audio_dry.ogg")):
            continue
        #Extract round
        round_match = re.search(round_pattern, file)
        round_number = round_match.group(1) if round_match else None

        # Extract the dyad
        dyad_match = re.search(dyad_pattern, file)
        dyad = dyad_match.group(1) if dyad_match else None

        # Extract the player
        player_match = re.search(player_pattern, file)
        player = player_match.group(1) if player_match else None

        
        #Extract file name with base_path
        relative_path = os.path.relpath(file, base_path)
        #save file as wav in new folder named wav in audio_preproc/psyphysical_main1
        wav_file = base_path.replace("/trimed", "") + "/wav/" + relative_path
        convert_ogg_to_wav(file, wav_file)

        fs, signal = wav.read(wav_file)
        filtered_signal = _get_envelope(signal, fs=fs)
        # Use the _get_voice_onset function to find the speech onset time
        idx_onset, onset_time = _get_voice_onset(filtered_signal, threshold=20000000, fs=fs, min_time=100)


          # Initialize the player in the data structure if not already present
        if player not in onset_time_data:
            onset_time_data[player] = {}

        # Initialize the round number for the player if not already present
        if round_number not in onset_time_data[player]:
            onset_time_data[player][round_number] = {}

        # Store the dyad and onset time for this player and round
        onset_time_data[player][round_number] = {
            "dyad": dyad,
            "onset_time": onset_time
        }


        print(f"File: {file}")
        print(round_number, dyad, player, onset_time)

        

    print('-' * 40)

Folder: C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed\1-p2p5\
Number of .ogg files: 4
File: C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed\1-p2p5\i-ebe07698e5bf6b45a6814e6bbd54e0dc-a-20240911-142842.968-s-mkpsyphysical_main1-n-1-p2p5-u-p5-c-1-audio-dry.ogg
1 p2p5 p5 3858.3541666666665
File: C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed\1-p2p5\i-ebe07698e5bf6b45a6814e6bbd54e0dc-a-20240911-142843.138-s-mkpsyphysical_main1-n-1-p2p5-u-p2-c-1-audio-dry.ogg
1 p2p5 p2 2500.25
----------------------------------------
Folder: C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed\1-p4p6\
Number of .ogg files: 4
File: C:/Users/DanielZander/Documents/DuckSoup/Development/pilot_ultimatumtest/audio_preproc/psyphysical_main1/trimed\1-p4p6\i-d7b8736f2

In [6]:
players = ["p1", "p2", "p3", "p4", "p5", "p6"]

column_names = ["player_nb", "round_nb", "dyad", "speech_onset"]
speech_data= pd.DataFrame(columns=column_names)

for player in players:
    for round in range(1, 31):  # Loop from round 1 to 30
        data = onset_time_data.get(player, {}).get(str(round), None)  # Assuming rounds are strings
        if data:
            print(f"{player}","round", f"{round}", data)
            print(data["dyad"])

            row_data = {
                'player_nb': player,
                'round_nb': round,
                'dyad': data["dyad"],
                'speech_onset': data["onset_time"]
             }

        else:
            print(f"No data for player {player}, round {round}")
            row_data = {
                'player_nb': player,
                'round_nb': round,
                'dyad': None,
                'speech_onset': None
            }
        speech_data = pd.concat([speech_data, pd.DataFrame([row_data])], ignore_index=True)


speech_data.to_csv("speech_data.csv", index=False)

No data for player p1, round 1
No data for player p1, round 2
p1 round 3 {'dyad': 'p1p5', 'onset_time': 15354.0}
p1p5
p1 round 4 {'dyad': 'p1p4', 'onset_time': 3191.708333333333}
p1p4
p1 round 5 {'dyad': 'p1p2', 'onset_time': 7475.0625}
p1p2
p1 round 6 {'dyad': 'p1p3', 'onset_time': 3070.9583333333335}
p1p3
No data for player p1, round 7
p1 round 8 {'dyad': 'p1p4', 'onset_time': nan}
p1p4
p1 round 9 {'dyad': 'p1p3', 'onset_time': nan}
p1p3
p1 round 10 {'dyad': 'p1p4', 'onset_time': nan}
p1p4
p1 round 11 {'dyad': 'p1p2', 'onset_time': 5533.375}
p1p2
No data for player p1, round 12
p1 round 13 {'dyad': 'p1p6', 'onset_time': 4112.520833333333}
p1p6
p1 round 14 {'dyad': 'p1p4', 'onset_time': 499.97916666666663}
p1p4
p1 round 15 {'dyad': 'p1p5', 'onset_time': 3391.6875}
p1p5
No data for player p1, round 16
p1 round 17 {'dyad': 'p1p2', 'onset_time': 7662.5}
p1p2
p1 round 18 {'dyad': 'p1p5', 'onset_time': 3329.083333333333}
p1p5
p1 round 19 {'dyad': 'p1p4', 'onset_time': 837.6875}
p1p4
No dat

  speech_data = pd.concat([speech_data, pd.DataFrame([row_data])], ignore_index=True)
