In [1]:
import tgt
import pandas as pd
import numpy as np
import os
import librosa

# Create Spectrograms

In [12]:
def save_spectrogram(filepath, start, stop, y, sr):
    """
    Input: .wav filepath, with start and stop time in seconds,
            y audio time series, sr samples rate of audio time
            series
    Output: Save a numpy file of mel spectrogram array of 
            dimension (n_mels, t)
    """
    S = librosa.feature.melspectrogram(y=y[sr * start:(sr * stop)],
                                       sr=sr, n_mels=64, fmax=sr / 2) 
    path_save = os.path.dirname(filepath) + '/' + os.path.basename(filepath).split('.')[0]
    np.save(path_save + '_' + str(start) + 'to' + str(stop) + '_spectro', S)

In [47]:
audio_path = 'audio/'

In [6]:
wavs = [os.path.join(root, name)
            for root, dirs, files in os.walk(audio_path)
            for name in files
            if name.endswith((".wav"))]

In [None]:
window_size = 6
slide = 3
for filepath in wavs:
    y, sr = librosa.load(filepath)
    length = int(len(y) / sr)
    remainder = length % window_size
    for i in ange(0, length-remainder, window_size):
            save_spectrogram(filepath, i, i + window_size, y, sr)
            j = i + slide
            if j + window_size < length-remainder:
                save_spectrogram(filepath, j, j + window_size, y, sr)

# Convert TextGrid file to csvs

In [185]:
def convert_tg_file_to_csv(file, annotation_path):
    """
    Filters TextGrid file to leave only tiers that are
    named containing 'laugh'
    Outputs a csv file
    """
    tg = tgt.io.read_textgrid(file, include_empty_intervals=True)
    tier_list = tg.get_tier_names()
    tier_no_laugh_list = [tier for tier in tier_list if 'laugh' not in tier]
    for tier in tier_no_laugh_list:
        tg.delete_tier(tier)
    csv = tgt.io.export_to_table(tg, separator=',')
    save_name = os.path.basename(file).split('.')[0] + '_Laugh.txt'
    save_dir = os.path.dirname(file)
    save_file = save_dir + '/' + save_name
    with open(save_file, 'w') as output:
        output.write(csv)

In [186]:
def convert_tg_file_to_part_csv(file, annotation_path):
    """
    Filters TextGrid file to leave only tiers that are
    named containing 'Part'. To be used to determine when roleplay
    starts.
    Outputs a csv file
    """
    tg = tgt.io.read_textgrid(file, include_empty_intervals=True)
    tier_list = tg.get_tier_names()
    tier_no_part_list = [tier for tier in tier_list if 'Part' not in tier]
    for tier in tier_no_part_list:
        tg.delete_tier(tier)
    csv = tgt.io.export_to_table(tg, separator=',')
    save_name = os.path.basename(file).split('.')[0] + '_Parts.txt'
    save_dir = os.path.dirname(file)
    save_file = save_dir + '/' + save_name
    with open(save_file, 'w') as output:
        output.write(csv)

In [6]:
annotation_path = 'transcriptions_annotations/'

In [188]:
TextGrid_files = [os.path.join(root, name)
             for root, dirs, files in os.walk(annotation_path)
             for name in files
             if name.endswith((".TextGrid"))]

In [None]:
for file in tqdm(TextGrid_files, desc='tg to csv'):
    convert_tg_file_to_csv(file, annotation_path)
    convert_tg_file_to_part_csv(file, annotation_path)

# Create Dataset (combine: id, spectrogram, label)

In [51]:
def find_label_start_end(spectro_file, annotation_path):
    """
    From spectrogram file, find and return:
    label path
    start time
    end time
    roleplay path
    """
    base_file = os.path.basename(spectro_file)
    start_time = int(base_file.split('_')[1].split('to')[0])
    end_time = int(base_file.split('_')[1].split('to')[1])
    
    label_dir = annotation_path + os.path.dirname(spectro_file).split('/')[-1]
    label_files = [f for f in os.listdir(label_dir) if f.endswith(("Laugh.txt"))]
    label_path = label_dir + '/' + label_files[0]
    
    roleplay_files = [f for f in os.listdir(label_dir) if f.endswith(("Parts.txt"))] 
    roleplay_path = label_dir + '/' + roleplay_files[0]
    return label_path, start_time, end_time, roleplay_path

In [3]:
def filter_csv(start_time, end_time, label_path):
    """
    Output a dataframe.
    Dataframe is filtered to contain 'laugh' in the text
    and contains records for specified start_time and end_time.
    When start_time in the csv is before specified start_time,
    this record will be included but start_time in the csv will be set
    to specified start_time. Same for end_time.
    For example:
    
    start_time	end_time	text
    905.765658	909.731864	<laughter> jaha läuft </laughter>
    
    if start_time was 907 and end_time was 909, this row would be set to:
    
    start_time	end_time	text
    907.0	909.0	<laughter> jaha läuft </laughter>
    
    """
    df = pd.read_csv(label_path)
    df = df[df['text'].str.contains('laugh')==True]
    df = df[df['start_time'] <= end_time]
    df = df[df['end_time'] >= start_time]
    df.loc[df.end_time > end_time, 'end_time'] = end_time
    df.loc[df.start_time < start_time, 'start_time'] = start_time
    return df

In [11]:
def create_id(spectro_file):
    """
    Return id for file
    """
    base_name = os.path.basename(spectro_file)
    r = base_name.split('_')[0]
    times = base_name.split('_')[1]
    file_id = r + '_' + times
    return file_id

In [12]:
def start_end_in_timesteps(df, start_time, timesteps_per_second):
    """
    Return dataframe after:
    Removing tier_name, tier_type and text columns
    Reformating times to start from 0 and end at 6
    Converting seconds to timesteps
    """
    df = df.drop(['tier_name', 'tier_type', 'text'], 1)
    df['start_time'] = df['start_time'] - start_time
    df['start_time'] = (df['start_time'] * timesteps_per_second).apply(np.floor)
    df['end_time'] = df['end_time'] - start_time
    df['end_time'] = (df['end_time'] * timesteps_per_second).apply(np.ceil)
    return df

In [13]:
def create_label_matrix(df):
    """
    Output vector of length (timesteps) with accompanying id
    Vector has values of 0 or 1
    1 representing laughter, 0 representing no laughter.
    
    For example:
    [1, 0, 0, 1, 0, 0 ....] represents laughter in timesteps 0 and 3
    """
    label = np.zeros(timesteps)
    update_list = []
    for index, row in df.iterrows():
        update_list.append([row['start_time'], row['end_time']])
    for l in update_list:
        start = int(l[0])
        end = int(l[1])
        label[start:end] = 1
    return label

In [14]:
def create_id_spectro_label(file_id, spectro_path, label):
    """
    Output a matrix containing 3 elements:
    id in format r, start_time, end_time
    related spectrogram
    related label
    """
    np_spectro_file = np.load(spectro_path)
    combined = [file_id, np_spectro_file, label]
    np_combined = np.asarray(combined)
    return np_combined

In [220]:
def roleplay_flag(roleplay_path):
    """
    Checks that the start and end times of the spectrogram
    are during the annotated roleplay times of the annotation file
    Return True if during annotated roleplay, else False.
    """
    df = pd.read_csv(roleplay_path)
    df = df.drop(['tier_name', 'tier_type', 'text'], 1)
    roleplay_times = []
    for index, row in df.iterrows():
        roleplay_times.append([row['start_time'], row['end_time']])
    proceed_flag = False
    for rp in roleplay_times:
        if start_time <= rp[1] and end_time >= rp[0]:
            proceed_flag = True
    return proceed_flag

In [53]:
spectros = [os.path.join(root, name)
            for root, dirs, files in os.walk(audio_path)
            for name in files
            if name.endswith(("spectro.npy"))]

In [None]:
dataset = []
window_size = 6
timesteps = 259 
timesteps_per_second = timesteps / window_size

for spectro_path in spectros:
    label_path, start_time, end_time, roleplay_path = find_label_start_end(spectro_path, annotation_path)
    if roleplay_flag(roleplay_path):
        df = filter_csv(start_time, end_time, label_path)
        df = start_end_in_timesteps(df, start_time, timesteps_per_second)
        df_label = create_label_matrix(df)
        file_id = create_id(spectro_path)
        np_combined = create_id_spectro_label(file_id, spectro_path, df_label)
        dataset.append(np_combined)
dataset = np.asarray(dataset)

In [222]:
np.save('de_laughter_ds',dataset)