In [25]:
import tgt
import pandas as pd
import numpy as np
import os
import librosa
from tqdm.notebook import tqdm

In [26]:
# set paths
audio_path = '/laughter/DUEL/ch/audio'
annotation_path = '/laughter/DUEL/ch/transcriptions_annotations/'
save_path = '/laughter/DUEL/datasets/ch'

# Create Spectrograms

In [27]:
def save_spectrogram(filepath, save_path, start, stop, y, sr):
    """Convert audio to spectrogram and save.
    
    # Arguments
        filepath: wav audio filepath.
        start: start time in seconds.
        stop: stop time in seconds.
        y: audio time series.
        sr: sample rate.
        
    # Outputs
        saves a numpy file of the mel spectrogram array with
        dimensions (n_mels, t)
    """
    S = librosa.feature.melspectrogram(y=y[sr * start:(sr * stop)],
                                       sr=sr, n_mels=64, fmax=sr/2)
    rp = os.path.basename(filepath).split('.')[0].split('_')[0]
    rpid = os.path.basename(filepath).split('.')[0]
    if not os.path.isdir(save_path + '/' + rp):
        os.makedirs(save_path + '/' + rp)
    save_path = save_path + '/' + rp + '/' + rpid + '_' + str(start) + 'to' + str(stop) + '_spectro'
    np.save(save_path, S)

In [28]:
mp3s = [os.path.join(root, name)
        for root, dirs, files in os.walk(audio_path)
        for name in files
        if name.endswith((".mp3"))]

In [29]:
window_size = 6
slide = 6

In [None]:
for filepath in tqdm(mp3s, desc='load audio'):
    y, sr = librosa.load(filepath, mono=True)
    length = int(len(y) / sr)
    remainder = length % window_size
    for i in tqdm(range(0, length - remainder - window_size, slide), desc='save_spectro', leave=False):
        save_spectrogram(filepath, save_path, i, i + window_size, y, sr)

# Convert TextGrid file to csvs

In [31]:
def convert_tg_file_to_csv(file, annotation_path):
    """Filter TextGrid file for laughter and convert to csv.
    
    # Arguments
    file: TextGrid file.
    annotation_path: folder containing annotations.
    
    # Saves
    csv file which is a filtered TextGrid file with only 
    tiers that are named containing 'laugh'
    """
    tg = tgt.io.read_textgrid(file, include_empty_intervals=True)
    tier_list = tg.get_tier_names()
    tier_no_laugh_list = [tier for tier in tier_list if 'laugh' not in tier]
    for tier in tier_no_laugh_list:
        tg.delete_tier(tier)
    csv = tgt.io.export_to_table(tg, separator=',')
    save_name = os.path.basename(file).split('.')[0] + '_Laugh.txt'
    save_dir = os.path.dirname(file)
    save_file = save_dir + '/' + save_name
    with open(save_file, 'w') as output:
        output.write(csv)

In [32]:
TextGrid_files = [os.path.join(root, name)
             for root, dirs, files in os.walk(annotation_path)
             for name in files
             if name.endswith((".TextGrid"))]

In [None]:
for file in tqdm(TextGrid_files, desc='tg to csv'):
    convert_tg_file_to_csv(file, annotation_path)

# Create Dataset (combine: id, spectrogram, label)

In [34]:
def find_label_start_end(spectro_file, annotation_path):
    """Find the label path, start and end time relating to spectrogram.
    
    # Arguments
        spectro_file: spectrogram filepath.
        annotation_path: directory of annotations.
        
    # Returns
        label_path: label filepath relating to the spectrogram.
        start_time: start time relating to the spectrogram.
        end_time: end time relating to the spectrogram.
    """
    base_file = os.path.basename(spectro_file)
    
    start_time = int(base_file.split('_')[2].split('to')[0])
    end_time = int(base_file.split('_')[2].split('to')[1])
    
    rx = base_file.split('_')[0] + '_' + base_file.split('_')[1]
    
    label_dir = annotation_path + os.path.dirname(spectro_file).split('/')[-1]
    label_files = [f for f in os.listdir(label_dir) if f.endswith((rx + "_Laugh.txt"))]
    label_path = label_dir + '/' + label_files[0]
    
    return label_path, start_time, end_time

In [35]:
def filter_csv(start_time, end_time, label_path):
    """Filters csv file for start and end time, returns as dataframe.
    
    # Arguments
        start_time: start time relating to spectrogram.
        end_time: end time relating to spectrogram.
        label_path: filepath of label.
        
    # Returns
        dataframe filtered to contain 'laugh' in the text
        and filtered for specified start_time and end_time.
        When start_time in the csv is before specified start_time,
        this record will be included but start_time in the csv will be set
        to specified start_time. Same for end_time.
        
    # Example
        start_time    end_time     text
        905.765658    909.731864   L

        if start_time was 907 and end_time was 909, this row would be set to:

        start_time    end_time     text
        907.0         909.0        L
    """    
    df = pd.read_csv(label_path)
    df = df[df['text'].str.contains('sigh') == False] # Remove sighs
    df = df[df['text'].str.contains('nonspeech') == False] # Remove nonspeech
    df = df[df['start_time'] <= end_time]
    df = df[df['end_time'] >= start_time]
    df.loc[df.end_time > end_time, 'end_time'] = end_time
    df.loc[df.start_time < start_time, 'start_time'] = start_time
    return df

In [36]:
def create_id(spectro_file):
    """Create identifier for spectrogram.
    
    # Arguments
        spectro_file: filepath for spectrogram.
        
    # Returns
        id for file.
        
    # Example
        input of spectro_file of 'audio/r7/r7_270to276_spectro.npy'
        would return 'r7_270to276'.
    """
    base_name = os.path.basename(spectro_file)
    r = base_name.split('_')[0]
    r2 = base_name.split('_')[1]
    times = base_name.split('_')[2]
    file_id = r + '_' + r2 + '_' + times
    return file_id

In [37]:
def start_end_in_timesteps(df, start_time, timesteps_per_second):
    """Convert start and end time from seconds to timesteps.
    Remove tier_name, tier_type and text columns.
    Reformat times to start from 0 and end at window_size.
    
    # Arguments
        df: dataframe in format from output of function filter_csv.
        start_time: start time relating to spectrogram.
        timesteps_per_second: timesteps_per_second = timesteps / window_size.
        
    # Returns
        dataframe after modifications.
    """
    df = df.drop(['tier_name', 'tier_type', 'text'], 1)
    df['start_time'] = df['start_time'] - start_time
    df['start_time'] = (df['start_time'] * timesteps_per_second).apply(np.floor)
    df['end_time'] = df['end_time'] - start_time
    df['end_time'] = (df['end_time'] * timesteps_per_second).apply(np.ceil)
    return df

In [38]:
def create_label_matrix(df):
    """Convert label annotations into a matrix.
    
    # Arguments
        df: dataframe in format from output of start_end_in_timesteps.
        
    # Returns
        vector of length (timesteps) which has values of 0 or 1.
        1 representing laughter, 0 representing no laughter.
    
    # Example
        [1, 0, 0, 1, 0, 0 ....] represents laughter in timesteps 0 and 3
    """
    label = np.zeros(timesteps)
    update_list = []
    for index, row in df.iterrows():
        update_list.append([row['start_time'], row['end_time']])
    for l in update_list:
        start = int(l[0])
        end = int(l[1])
        label[start:end] = 1
    return label

In [39]:
def create_id_spectro_label(file_id, spectro_path, label):
    """Combine id, spectrogram and label.
    
    # Arguments
        file_id: file id created from function create_id.
        spectro_path: filepath for spectrogram.
        label: label created from function create_label_matrix.
        
    # Returns
        numpy array containing 3 elements:
        id
        related spectrogram
        related label
    """
    np_spectro_file = np.load(spectro_path)
    combined = [file_id, np_spectro_file, label]
    np_combined = np.asarray(combined)
    return np_combined

In [40]:
def roleplay_flag(label_path):
    """States whether label path is during the roleplay or not.
    Roleplay start is when first annotation for the audio fiel is made.
    
    # Arguments
        label_path: filepath for Laugh file.
        
    # Returns
        True if the start and end times of the spectrogram
        are during the annotated roleplay times. Else returns False.
    """
    df = pd.read_csv(label_path)
    df = df.sort_values(by=['start_time'])
    roleplay_start = int(df.head(1)['start_time']) - np.random.randint(low=1, high=7)
    proceed_flag = False
    if start_time >= roleplay_start:
            proceed_flag = True
    return proceed_flag

In [42]:
spectros = [os.path.join(root, name)
            for root, dirs, files in os.walk(save_path)
            for name in files
            if name.endswith(("spectro.npy"))]

In [None]:
dataset = []
spectro_eg = np.load(spectros[0])
timesteps = spectro_eg.shape[1] 
timesteps_per_second = timesteps / window_size

for spectro_path in tqdm(spectros, desc='create dataset'):
    label_path, start_time, end_time = find_label_start_end(spectro_path, annotation_path)
    if roleplay_flag(label_path):
        df = filter_csv(start_time, end_time, label_path)
        df = start_end_in_timesteps(df, start_time, timesteps_per_second)
        df_label = create_label_matrix(df)
        file_id = create_id(spectro_path)
        np_combined = create_id_spectro_label(file_id, spectro_path, df_label)
        dataset.append(np_combined)

# Check laughter rate and save train, val and test files

In [41]:
def increase_laugh_rate(dataset, percentage_laughs):
    """Increase the laughter rate in the dataset.
    
    # Arguments
        dataset: dataset to have laughter percentage increased.
        percentage_laughs: desired percentage for dataset of 
                           examples containing a laugh. As integer.
    
    # Reurns
        dataset_inc_laughs: dataset with increased rate of 
                            examples containing a laugh.
    """
    laugh = [example for example in dataset if 1 in example[2]]
    laugh_count = len(laugh)
    no_laugh = [example for example in dataset if 1 not in example[2]]
    no_laugh_count = len(no_laugh)
    
    delete_from_no_laugh = len(dataset) - laugh_count * 100 / percentage_laughs
    delete_from_no_laugh = int(delete_from_no_laugh)
    
    no_laugh = no_laugh[0:-delete_from_no_laugh]
    
    dataset_inc_laughs = np.vstack((laugh, no_laugh))
    return dataset_inc_laughs

In [42]:
def laughter_check(dataset):
    """Check the percentage of clips that contain laughter in the dataset.
    
    # Arguments
        dataset: dataset to be checked.
    
    # Returns
        percentage of examples containing laughter.
    """
    laughs = len([e for e in dataset[:, 2] if 1 in e])
    total = len(dataset[:, 2])
    laughter_p = laughs / total
    return laughter_p

In [43]:
dataset = np.asarray(dataset)

In [44]:
dataset.shape

(3047, 3)

In [45]:
laughter_check(dataset)

0.21234000656383328

In [57]:
train_val = [e for e in dataset 
             if 'r6' not in e[0]
             and 'r2' not in e[0]]

In [58]:
test = [e for e in dataset 
        if 'r6' in e[0]
        or 'r2' in e[0]]

In [59]:
train_val = np.asarray(train_val)
np.random.shuffle(train_val)
val_split = int(len(dataset) * 0.1)
train = train_val[:-val_split]
val = train_val[-val_split:]

test = np.asarray(test)
np.random.shuffle(test)
print(laughter_check(train))
print(laughter_check(val))
print(laughter_check(test))

0.2141552511415525
0.19407894736842105
0.21518987341772153


In [64]:
print(train.shape)
print(laughter_check(train))

(1422, 3)
0.329817158931083


In [63]:
train = increase_laugh_rate(train, 33)
print(train.shape)
print(laughter_check(train))

(1422, 3)
0.329817158931083


In [67]:
print(val.shape)
print(laughter_check(val))

(304, 3)
0.19407894736842105


In [66]:
print(test.shape)
print(laughter_check(test))

(553, 3)
0.21518987341772153


In [73]:
np.save(save_path+'ch_train_6_6_64_ds', train)
np.save(save_path+'ch_val_6_6_64_ds', val)
np.save(save_path+'ch_test_6_6_64_ds', test)