# wav2vec_test
- force align each annotation/wav snippet 
- save results in json file

In [6]:
import os
import json
import torch
import IPython
import torchaudio
import matplotlib.pyplot as plt
from dataclasses import dataclass


print(torch.__version__)
print(torchaudio.__version__)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

torch.random.manual_seed(0)

2.2.2+cpu
2.2.2+cpu
cpu


<torch._C.Generator at 0x16b7886c7b0>

In [7]:
bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
model = bundle.get_model().to(device)
labels = bundle.get_labels()

print(labels)

('-', '|', 'E', 'T', 'A', 'O', 'N', 'I', 'H', 'S', 'R', 'D', 'L', 'U', 'M', 'W', 'C', 'F', 'G', 'Y', 'P', 'B', 'V', 'K', "'", 'X', 'J', 'Q', 'Z')


In [8]:
import re

folder_path = 'C:\\Users\\barth\\gits\\pytorch_wav2vec\\test_data\\'
inputPath = folder_path + "input\\"
mediaPath = folder_path + "media_snippets\\"

def read_json_file(filename):
    try:
        with open(filename, 'r') as json_file:
            data = json.load(json_file)
            return data
    except Exception as e:
        print(f"Error reading JSON file: {e}")
        return None

def get_trellis(emission, tokens, blank_id=0):
    num_frame = emission.size(0)
    num_tokens = len(tokens)

    trellis = torch.zeros((num_frame, num_tokens))
    trellis[1:, 0] = torch.cumsum(emission[1:, blank_id], 0)
    trellis[0, 1:] = -float("inf")
    trellis[-num_tokens + 1 :, 0] = float("inf")

    for t in range(num_frame - 1):
        trellis[t + 1, 1:] = torch.maximum(
            # Score for staying at the same token
            trellis[t, 1:] + emission[t, blank_id],
            # Score for changing to the next token
            trellis[t, :-1] + emission[t, tokens[1:]],
        )
    return trellis

def plot():
    fig, ax = plt.subplots()
    img = ax.imshow(trellis.T, origin="lower")
    ax.annotate("- Inf", (trellis.size(1) / 5, trellis.size(1) / 1.5))
    ax.annotate("+ Inf", (trellis.size(0) - trellis.size(1) / 5, trellis.size(1) / 3))
    fig.colorbar(img, ax=ax, shrink=0.6, location="bottom")
    fig.tight_layout()

@dataclass
class Point:
    token_index: int
    time_index: int
    score: float

def backtrack(trellis, emission, tokens, blank_id=0):
    t, j = trellis.size(0) - 1, trellis.size(1) - 1

    path = [Point(j, t, emission[t, blank_id].exp().item())]
    while j > 0:
        # Should not happen but just in case
        assert t > 0

        # 1. Figure out if the current position was stay or change
        # Frame-wise score of stay vs change
        p_stay = emission[t - 1, blank_id]
        p_change = emission[t - 1, tokens[j]]

        # Context-aware score for stay vs change
        stayed = trellis[t - 1, j] + p_stay
        changed = trellis[t - 1, j - 1] + p_change

        # Update position
        t -= 1
        if changed > stayed:
            j -= 1

        # Store the path with frame-wise probability.
        prob = (p_change if changed > stayed else p_stay).exp().item()
        path.append(Point(j, t, prob))

    # Now j == 0, which means, it reached the SoS.
    # Fill up the rest for the sake of visualization
    while t > 0:
        prob = emission[t - 1, blank_id].exp().item()
        path.append(Point(j, t - 1, prob))
        t -= 1

    return path[::-1]

# Merge the labels
@dataclass
class Segment:
    label: str
    start: int
    end: int
    score: float

    def __repr__(self):
        return f"{self.label}\t({self.score:4.2f}): [{self.start:5d}, {self.end:5d})"

    @property
    def length(self):
        return self.end - self.start


def merge_repeats(path):
    i1, i2 = 0, 0
    segments = []
    while i1 < len(path):
        while i2 < len(path) and path[i1].token_index == path[i2].token_index:
            i2 += 1
        score = sum(path[k].score for k in range(i1, i2)) / (i2 - i1)
        segments.append(
            Segment(
                transcript[path[i1].token_index],
                path[i1].time_index,
                path[i2 - 1].time_index + 1,
                score,
            )
        )
        i1 = i2
    return segments

# Merge words
def merge_words(segments, separator="|"):
    words = []
    sounds = []
    i1, i2 = 0, 0
    while i1 < len(segments):
        
        if i2 >= len(segments) or segments[i2].label == separator:
            if i1 != i2:
                segs = segments[i1:i2]
                word = "".join([seg.label for seg in segs])
                score = sum(seg.score * seg.length for seg in segs) / sum(seg.length for seg in segs)
                words.append(Segment(word, segments[i1].start, segments[i2 - 1].end, score))
            i1 = i2 + 1
            i2 = i1
        else:
            i2 += 1
    print (words, sounds)
    return words, sounds

def segment_info(i):
    ratio = waveform.size(1) / trellis.size(0)
    word = word_segments[i]
    x0 = int(ratio * word.start)
    x1 = int(ratio * word.end)
    #print(f"{word.label} ({word.score:.2f}): {x0 / 44100:.3f} - {x1 / 44100:.3f} sec")
    return ([f"{word.label}", format(x0 / 44100,'.3f') , format(x1 / 44100, '.3f')])
    

for filename in os.listdir(mediaPath):
    counter  = 0
    
    if filename.endswith('.json'):
        jsonFile = mediaPath + filename
        json_data = read_json_file(jsonFile)
        for k, v in json_data.items():
            
            if v["text"] and len(v["text"]) > 0:
                
                text_clean = v["text"].upper().replace(' ', '|')
                transcript = text_clean
            
            else:
                transcript = "|"
            #print (text_clean)
            
            
            print (transcript)
            regex = re.compile('[^a-zA-Z|]')
            transcript = regex.sub('', transcript)
            
            
            print (transcript)
            
            dictionary = {c: i for i, c in enumerate(labels)}
            
            tokens = [dictionary[c] for c in transcript]
            #print(list(zip(transcript, tokens)))

            SPEECH_FILE = "C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/" + filename[:-5] + "\\" + k.replace(" ", "_") + ".wav" #Cathy_Samun_Wiliang_a1.wav"
            print (SPEECH_FILE)
            #bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
            
            #model = bundle.get_model().to(device)
            #labels = bundle.get_labels()
            
            
            with torch.inference_mode():
                waveform, _ = torchaudio.load(SPEECH_FILE)
                emissions, _ = model(waveform.to(device))
                emissions = torch.log_softmax(emissions, dim=-1)
            
            emission = emissions[0].cpu().detach()
            metadata = torchaudio.info(SPEECH_FILE)
            #print(metadata)
            
            trellis = get_trellis(emission, tokens)
           
            #plot()
            
            path = backtrack(trellis, emission, tokens)

            segments = merge_repeats(path)
            
            
            letter_list = []

            for segment in range(len(segments)):
                ratio = waveform.size(1) / trellis.size(0)

                x0 = int(ratio * segments[segment].start)
                x1 = int(ratio * segments[segment].end)

                #print(f"{word.label} ({word.score:.2f}): {x0 / 44100:.3f} - {x1 / 44100:.3f} sec")
                soundInfo = [f"{segments[segment].label}", format(x0 / 44100,'.3f') , format(x1 / 44100, '.3f')]
                soundInfo[1] = int(float(soundInfo[1]) * 1000) + v['timeStamp1']
                soundInfo[2] = int(float(soundInfo[2]) * 1000) + v['timeStamp1']

                # ingnore the word gaps
                if soundInfo[0] != "|": 
                    letter_list.append(soundInfo)
                
                
                
            word_segments, sounds = merge_words(segments)
            word_list = []
            for x in range(0, len(word_segments)):
                wordInfo = segment_info(x)
                wordInfo[1] = int(float(wordInfo[1]) * 1000) + v['timeStamp1']
                wordInfo[2] = int(float(wordInfo[2]) * 1000) + v['timeStamp1']
                print(wordInfo)
                
                word_list.append(wordInfo)


            print (word_list)
            print(v)
            v["tier_new"] = v["tiername"] + "_words"
            v["words"] = word_list
            v["letters"] = letter_list                 
            
            
            #print(transcript)
            #display_segment(-1)
            #IPython.display.Audio(SPEECH_FILE, rate=44100)

        print (json_data)
        with open(jsonFile, 'w') as json_file:
            json.dump(json_data, json_file, indent=2)

print (" +++ DONE +++ " )

WELL|BEFORE|MY|HIGH|SCHOOL|YEAH|
WELL|BEFORE|MY|HIGH|SCHOOL|YEAH|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a329.wav
[WELL	(0.89): [    0,    58), BEFORE	(0.88): [   68,   169), MY	(0.49): [  170,   172), HIGH	(0.91): [  250,   300), SCHOOL	(0.75): [  305,   341), YEAH	(0.87): [  361,   426)] []
['WELL', 8460, 8882]
['BEFORE', 8954, 9688]
['MY', 9696, 9710]
['HIGH', 10277, 10641]
['SCHOOL', 10677, 10939]
['YEAH', 11084, 11556]
[['WELL', 8460, 8882], ['BEFORE', 8954, 9688], ['MY', 9696, 9710], ['HIGH', 10277, 10641], ['SCHOOL', 10677, 10939], ['YEAH', 11084, 11556]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a329', 'timeSlotRef1': 'ts6', 'timeSlotRef2': 'ts7', 'timeStamp1': 8460, 'timeStamp2': 11680, 'text': 'Well before my high school yeah ', 'words': [], 'phonemes': []}
I|STUDY|THE|THERE|A|COUPLE|OF|YEARS|IN|OUR|HOMETOWN|
I|STUDY|THE|THERE|A|COUPLE|OF|YEARS|IN|OUR|HOMETOWN|
C:/Users/barth/gits/pytorch_wav2vec/tes

[YEP	(0.86): [    0,    67)] []
['YEP', 41600, 42090]
[['YEP', 41600, 42090]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1056', 'timeSlotRef1': 'ts43', 'timeSlotRef2': 'ts46', 'timeStamp1': 41600, 'timeStamp2': 42090, 'text': '!yep', 'words': [], 'phonemes': []}
THREE|YEARS|FOR|MAST~|UH|FOR|BACHELOR|DEGREE|AND|THEN|TWO|YEARS|FOR|MASTER|DEGREE
THREE|YEARS|FOR|MAST|UH|FOR|BACHELOR|DEGREE|AND|THEN|TWO|YEARS|FOR|MASTER|DEGREE
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a331.wav
[THREE	(0.86): [    0,    46), YEARS	(0.84): [   51,    87), FOR	(0.81): [   98,   131), MAST	(0.89): [  134,   168), UH	(0.64): [  203,   230), FOR	(0.84): [  251,   282), BACHELOR	(0.81): [  293,   340), DEGREE	(0.85): [  375,   437), AND	(0.79): [  456,   467), THEN	(0.81): [  479,   498), TWO	(0.88): [  520,   565), YEARS	(0.84): [  572,   611), FOR	(0.85): [  625,   649), MASTER	(0.88): [  658,   704), DEGREE	(0.89): [  730,   794)] []
['TH

[PEOPLE	(0.79): [    0,    36), GET	(0.81): [  113,   135), SOLVE	(0.79): [  157,   188), PROBLEM	(0.83): [  213,   286), LIKE	(0.71): [  288,   306), TOGETHER	(0.81): [  316,   387)] []
['PEOPLE', 88845, 89107]
['GET', 89667, 89827]
['SOLVE', 89988, 90213]
['PROBLEM', 90395, 90926]
['LIKE', 90941, 91072]
['TOGETHER', 91145, 91661]
[['PEOPLE', 88845, 89107], ['GET', 89667, 89827], ['SOLVE', 89988, 90213], ['PROBLEM', 90395, 90926], ['LIKE', 90941, 91072], ['TOGETHER', 91145, 91661]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1068', 'timeSlotRef1': 'ts93', 'timeSlotRef2': 'ts94', 'timeStamp1': 88845, 'timeStamp2': 91785, 'text': 'people get solve problem like together ', 'words': [], 'phonemes': []}
YEAH|AND|VERY|HARD|TO|TO|TO|DO|SOMETHING|BY|YOURSELF|AND
YEAH|AND|VERY|HARD|TO|TO|TO|DO|SOMETHING|BY|YOURSELF|AND
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1069.wav
[YEAH	(0.88): [    0,    49), AND	(0.46): [  127,   

[YEAH	(0.90): [    0,    87)] []
['YEAH', 116088, 116725]
[['YEAH', 116088, 116725]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1561', 'timeSlotRef1': 'ts117', 'timeSlotRef2': 'ts118', 'timeStamp1': 116088, 'timeStamp2': 116725, 'text': '!yeah', 'words': [], 'phonemes': []}
!YEAH|!YEAH|YEAH
YEAH|YEAH|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a334.wav
[YEAH	(0.82): [    0,    61), YEAH	(0.85): [   65,   101), YEAH	(0.85): [  129,   210)] []
['YEAH', 118210, 118654]
['YEAH', 118684, 118946]
['YEAH', 119150, 119740]
[['YEAH', 118210, 118654], ['YEAH', 118684, 118946], ['YEAH', 119150, 119740]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a334', 'timeSlotRef1': 'ts121', 'timeSlotRef2': 'ts125', 'timeStamp1': 118210, 'timeStamp2': 119740, 'text': '!Yeah !yeah yeah', 'words': [], 'phonemes': []}
OKAY
OKAY
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stell

[AND	(0.90): [    0,    37), THEN	(0.83): [   59,    82), AFTER	(0.86): [  100,   183), THEN	(0.71): [  225,   240), I	(0.91): [  264,   268), GOT	(0.75): [  292,   313), A	(0.97): [  314,   315), GOT	(0.83): [  389,   431), A	(0.72): [  523,   528), BABY	(0.90): [  537,   565)] []
['AND', 155835, 156104]
['THEN', 156264, 156431]
['AFTER', 156561, 157164]
['THEN', 157470, 157579]
['I', 157753, 157782]
['GOT', 157956, 158109]
['A', 158116, 158123]
['GOT', 158661, 158966]
['A', 159635, 159671]
['BABY', 159736, 159940]
[['AND', 155835, 156104], ['THEN', 156264, 156431], ['AFTER', 156561, 157164], ['THEN', 157470, 157579], ['I', 157753, 157782], ['GOT', 157956, 158109], ['A', 158116, 158123], ['GOT', 158661, 158966], ['A', 159635, 159671], ['BABY', 159736, 159940]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1084', 'timeSlotRef1': 'ts157', 'timeSlotRef2': 'ts158', 'timeStamp1': 155835, 'timeStamp2': 160085, 'text': 'and then after then I got a got a baby ', 'words': [], 'phonemes': [

[YEAH	(0.87): [    0,    60), YEAH	(0.81): [   66,    98), YEAH	(0.82): [  115,   178), JUST	(0.86): [  191,   224)] []
['YEAH', 182955, 183392]
['YEAH', 183435, 183668]
['YEAH', 183792, 184250]
['JUST', 184345, 184585]
[['YEAH', 182955, 183392], ['YEAH', 183435, 183668], ['YEAH', 183792, 184250], ['JUST', 184345, 184585]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1094', 'timeSlotRef1': 'ts194', 'timeSlotRef2': 'ts196', 'timeStamp1': 182955, 'timeStamp2': 184585, 'text': '!yeah !yeah yeah just', 'words': [], 'phonemes': []}
BE|THERE|AS|A|SALES|FOR|MAYBE|ONE|AND|A|HALF|YEAR|AND|THEN|I|COME|OUT|
BE|THERE|AS|A|SALES|FOR|MAYBE|ONE|AND|A|HALF|YEAR|AND|THEN|I|COME|OUT|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a338.wav
[BE	(0.89): [    0,    27), THERE	(0.88): [   39,    85), AS	(0.70): [  100,   114), A	(0.89): [  122,   128), SALES	(0.82): [  138,   189), FOR	(0.91): [  205,   284), MAYBE	(0.92): [  285,   356), ONE

[AND	(0.94): [    0,    59), JUST	(0.81): [   98,   136), BECAUSE	(0.79): [  146,   195), MY	(0.80): [  206,   224), BOYFRIEND	(0.84): [  239,   311), AND	(0.56): [  345,   351), MY	(0.83): [  366,   380), HUSBAND	(0.87): [  400,   479), NOW	(0.75): [  482,   494)] []
['AND', 223205, 223634]
['JUST', 223917, 224193]
['BECAUSE', 224266, 224622]
['MY', 224702, 224833]
['BOYFRIEND', 224942, 225465]
['AND', 225712, 225756]
['MY', 225865, 225967]
['HUSBAND', 226112, 226686]
['NOW', 226708, 226795]
[['AND', 223205, 223634], ['JUST', 223917, 224193], ['BECAUSE', 224266, 224622], ['MY', 224702, 224833], ['BOYFRIEND', 224942, 225465], ['AND', 225712, 225756], ['MY', 225865, 225967], ['HUSBAND', 226112, 226686], ['NOW', 226708, 226795]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1103', 'timeSlotRef1': 'ts229', 'timeSlotRef2': 'ts230', 'timeStamp1': 223205, 'timeStamp2': 226875, 'text': 'and just because my boyfriend and my husband now ', 'words': [], 'phonemes': []}
MY|BOYFRIEND|BEFORE|
M

[UH	(0.83): [    0,    46), MODERN	(0.91): [   57,   121), BUILDING	(0.85): [  133,   237), EVERYWHERE	(0.87): [  284,   375), YEAH	(0.60): [  376,   384), SO	(0.81): [  392,   429), ITS	(0.44): [  447,   455), QUITE	(0.73): [  469,   489), DIFFERENT	(0.82): [  499,   550), AS	(0.87): [  588,   605)] []
['UH', 257665, 257999]
['MODERN', 258079, 258544]
['BUILDING', 258632, 259388]
['EVERYWHERE', 259729, 260391]
['YEAH', 260398, 260456]
['SO', 260514, 260783]
['ITS', 260914, 260972]
['QUITE', 261074, 261219]
['DIFFERENT', 261292, 261663]
['AS', 261939, 262062]
[['UH', 257665, 257999], ['MODERN', 258079, 258544], ['BUILDING', 258632, 259388], ['EVERYWHERE', 259729, 260391], ['YEAH', 260398, 260456], ['SO', 260514, 260783], ['ITS', 260914, 260972], ['QUITE', 261074, 261219], ['DIFFERENT', 261292, 261663], ['AS', 261939, 262062]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1113', 'timeSlotRef1': 'ts265', 'timeSlotRef2': 'ts268', 'timeStamp1': 257665, 'timeStamp2': 262135, 'text': "uh

[BUT	(0.80): [    0,    21), OF	(0.77): [   28,    39), COURSE	(0.81): [   48,    77), COMPARED	(0.85): [   83,   130), TO	(0.86): [  132,   144), SYDNEY	(0.80): [  158,   199), AUS	(0.71): [  214,   232)] []
['BUT', 288523, 288676]
['OF', 288726, 288806]
['COURSE', 288872, 289082]
['COMPARED', 289126, 289467]
['TO', 289482, 289569]
['SYDNEY', 289671, 289969]
['AUS', 290078, 290208]
[['BUT', 288523, 288676], ['OF', 288726, 288806], ['COURSE', 288872, 289082], ['COMPARED', 289126, 289467], ['TO', 289482, 289569], ['SYDNEY', 289671, 289969], ['AUS', 290078, 290208]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1124', 'timeSlotRef1': 'ts297', 'timeSlotRef2': 'ts298', 'timeStamp1': 288523, 'timeStamp2': 290223, 'text': 'but of course compared to Sydney Aus~ ', 'words': [], 'phonemes': []}
UH|CANBERRA|IS|QUITE|MORE|COUNTRY|STYLE|!FOR|!ME|
UH|CANBERRA|IS|QUITE|MORE|COUNTRY|STYLE|FOR|ME|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__

[UH	(0.89): [    0,    29), WEEKDAYS	(0.84): [   36,   134), WE	(0.85): [  140,   154), CAN	(0.87): [  164,   188), JUST	(0.86): [  196,   222), WORKING	(0.83): [  285,   338), WEEKDAYS	(0.77): [  339,   371), AND	(0.87): [  410,   434), DONT	(0.83): [  500,   528), NEED	(0.73): [  532,   545), TO	(0.89): [  560,   580), OVERTIME	(0.85): [  594,   681), LIKE	(0.69): [  702,   717), WORKING	(0.87): [  726,   816)] []
['UH', 313890, 314101]
['WEEKDAYS', 314151, 314863]
['WE', 314906, 315008]
['CAN', 315081, 315255]
['JUST', 315313, 315502]
['WORKING', 315960, 316345]
['WEEKDAYS', 316352, 316584]
['AND', 316867, 317042]
['DONT', 317521, 317724]
['NEED', 317753, 317848]
['TO', 317957, 318102]
['OVERTIME', 318204, 318835]
['LIKE', 318988, 319097]
['WORKING', 319162, 319816]
[['UH', 313890, 314101], ['WEEKDAYS', 314151, 314863], ['WE', 314906, 315008], ['CAN', 315081, 315255], ['JUST', 315313, 315502], ['WORKING', 315960, 316345], ['WEEKDAYS', 316352, 316584], ['AND', 316867, 317042], ['DONT

[ITS	(0.80): [    0,    30), QUITE	(0.69): [   34,    55), COMFORTABLE	(0.88): [   62,   157), AND	(0.63): [  196,   203), QUITE	(0.73): [  229,   251), SIMPLE	(0.73): [  263,   293), RELATIONSHIP	(0.85): [  301,   430), WITH	(0.86): [  437,   459)] []
['ITS', 359310, 359528]
['QUITE', 359557, 359710]
['COMFORTABLE', 359760, 360451]
['AND', 360734, 360785]
['QUITE', 360974, 361133]
['SIMPLE', 361221, 361439]
['RELATIONSHIP', 361497, 362434]
['WITH', 362485, 362645]
[['ITS', 359310, 359528], ['QUITE', 359557, 359710], ['COMFORTABLE', 359760, 360451], ['AND', 360734, 360785], ['QUITE', 360974, 361133], ['SIMPLE', 361221, 361439], ['RELATIONSHIP', 361497, 362434], ['WITH', 362485, 362645]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1149', 'timeSlotRef1': 'ts370', 'timeSlotRef2': 'ts372', 'timeStamp1': 359310, 'timeStamp2': 362710, 'text': "It's quite comfortable and quite simple relationship with ", 'words': [], 'phonemes': []}
PEOPLE|NEARBY|IS|QUITE|SIMPLE|
PEOPLE|NEARBY|IS|QUITE|

[BUT	(0.86): [    0,    52), WERE	(0.73): [   60,    74), NOT	(0.91): [   91,   112), ALWAYS	(0.68): [  170,   203), BEEN	(0.78): [  208,   227), TO	(0.85): [  232,   246), SYDNEY	(0.72): [  258,   283)] []
['BUT', 392447, 392826]
['WERE', 392884, 392986]
['NOT', 393110, 393263]
['ALWAYS', 393685, 393925]
['BEEN', 393962, 394100]
['TO', 394136, 394238]
['SYDNEY', 394326, 394508]
[['BUT', 392447, 392826], ['WERE', 392884, 392986], ['NOT', 393110, 393263], ['ALWAYS', 393685, 393925], ['BEEN', 393962, 394100], ['TO', 394136, 394238], ['SYDNEY', 394326, 394508]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1162', 'timeSlotRef1': 'ts411', 'timeSlotRef2': 'ts412', 'timeStamp1': 392447, 'timeStamp2': 394770, 'text': "but we're not always been to Sydney ", 'words': [], 'phonemes': []}
YEAH|MAYBE|TWO|WEEKS|YEAH|TWICE|A|WEEK|SO|YEAH|ONCE|A|WEEKS
YEAH|MAYBE|TWO|WEEKS|YEAH|TWICE|A|WEEK|SO|YEAH|ONCE|A|WEEKS
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\

[YEAH	(0.88): [    0,    70)] []
['YEAH', 435995, 436508]
[['YEAH', 435995, 436508]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1568', 'timeSlotRef1': 'ts449', 'timeSlotRef2': 'ts450', 'timeStamp1': 435995, 'timeStamp2': 436508, 'text': '!yeah', 'words': [], 'phonemes': []}
NORMALLY|IT'S|MANDARIN|
NORMALLY|ITS|MANDARIN|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a352.wav
[NORMALLY	(0.84): [    0,    71), ITS	(0.78): [   81,    95), MANDARIN	(0.79): [  112,   171)] []
['NORMALLY', 442084, 442602]
['ITS', 442675, 442777]
['MANDARIN', 442901, 443331]
[['NORMALLY', 442084, 442602], ['ITS', 442675, 442777], ['MANDARIN', 442901, 443331]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a352', 'timeSlotRef1': 'ts456', 'timeSlotRef2': 'ts457', 'timeStamp1': 442084, 'timeStamp2': 443484, 'text': "Normally it's Mandarin ", 'words': [], 'phonemes': []}
BUT|WHEN|I|TALK|TO|MY|HOMETOWN|FRIENDS|
BUT|WHEN|I|TALK|TO|MY|HOMETOWN|FR

[UM	(0.93): [    0,    48), IN	(0.69): [   69,    76), CHINA	(0.86): [   85,   137), YOU	(0.73): [  141,   152), KNOW	(0.75): [  153,   170), CHINA	(0.83): [  176,   212), GOT	(0.82): [  219,   250), A	(1.00): [  251,   252), A	(0.77): [  323,   330), LOT	(0.87): [  349,   377), OF	(0.72): [  384,   392), DIFFERENT	(0.73): [  429,   466), LANGUAGE	(0.85): [  478,   538)] []
['UM', 467153, 467502]
['IN', 467654, 467705]
['CHINA', 467770, 468148]
['YOU', 468177, 468257]
['KNOW', 468264, 468388]
['CHINA', 468431, 468693]
['GOT', 468743, 468969]
['A', 468976, 468983]
['A', 469499, 469549]
['LOT', 469687, 469891]
['OF', 469942, 470000]
['DIFFERENT', 470268, 470537]
['LANGUAGE', 470624, 471060]
[['UM', 467153, 467502], ['IN', 467654, 467705], ['CHINA', 467770, 468148], ['YOU', 468177, 468257], ['KNOW', 468264, 468388], ['CHINA', 468431, 468693], ['GOT', 468743, 468969], ['A', 468976, 468983], ['A', 469499, 469549], ['LOT', 469687, 469891], ['OF', 469942, 470000], ['DIFFERENT', 470268, 470537

[MY	(0.90): [    0,    34), HUSBAND	(0.81): [   44,    83), AND	(0.56): [   92,    98), I	(0.85): [  139,   146), NEED	(0.70): [  152,   164), TO	(0.93): [  184,   224), SPEAK	(0.83): [  263,   298), MANDARIN	(0.71): [  337,   364), AT	(0.60): [  376,   382), HOME	(0.70): [  388,   400), ALL	(0.74): [  434,   445), THE	(0.80): [  470,   485), DAY	(0.82): [  488,   503), YEAH	(0.95): [  516,   625), SO	(0.65): [  635,   640), HE	(0.78): [  651,   660), NEED	(0.64): [  662,   671), TO	(0.86): [  740,   754), UNDERSTAND	(0.78): [  761,   822), AND	(0.00): [  837,   840), HE	(0.50): [  841,   845), CAN	(0.84): [  882,   895), TALK	(0.82): [  912,   949), AS	(0.76): [  959,   981), WELL	(0.88): [  989,  1013)] []
['MY', 495910, 496157]
['HUSBAND', 496229, 496513]
['AND', 496578, 496621]
['I', 496918, 496970]
['NEED', 497013, 497101]
['TO', 497246, 497536]
['SPEAK', 497819, 498073]
['MANDARIN', 498356, 498552]
['AT', 498640, 498683]
['HOME', 498727, 498814]
['ALL', 499061, 499140]
['THE', 49

[YEAH	(0.91): [    0,    36), GRADE	(0.73): [   58,   101), THREE	(0.79): [  108,   130), YEAH	(0.84): [  193,   220)] []
['YEAH', 529907, 530169]
['GRADE', 530329, 530642]
['THREE', 530692, 530852]
['YEAH', 531311, 531507]
[['YEAH', 529907, 530169], ['GRADE', 530329, 530642], ['THREE', 530692, 530852], ['YEAH', 531311, 531507]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a360', 'timeSlotRef1': 'ts579', 'timeSlotRef2': 'ts581', 'timeStamp1': 529907, 'timeStamp2': 531507, 'text': 'yeah !grade !three !yeah', 'words': [], 'phonemes': []}
!YEAH|YEAH|
YEAH|YEAH|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a361.wav
[YEAH	(0.86): [    0,    51), YEAH	(0.94): [   52,   135)] []
['YEAH', 550406, 550778]
['YEAH', 550785, 551390]
[['YEAH', 550406, 550778], ['YEAH', 550785, 551390]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a361', 'timeSlotRef1': 'ts596', 'timeSlotRef2': 'ts598', 'timeStamp1': 550406, 'timeStamp2': 551499

[NOWADAYS	(0.90): [    0,    89), MOST	(0.82): [  113,   137), OF	(0.76): [  144,   159), OUR	(0.81): [  185,   212), YOUNGER	(0.82): [  224,   276), UH	(0.96): [  277,   365), YOUNG	(0.71): [  392,   425), PEOPLE	(0.84): [  544,   593), IN	(0.47): [  664,   668), OUR	(0.79): [  676,   690), HOMETOWN	(0.83): [  697,   770)] []
['NOWADAYS', 583085, 583732]
['MOST', 583906, 584081]
['OF', 584131, 584240]
['OUR', 584429, 584626]
['YOUNGER', 584713, 585090]
['UH', 585098, 585737]
['YOUNG', 585934, 586173]
['PEOPLE', 587038, 587394]
['IN', 587910, 587939]
['OUR', 587997, 588099]
['HOMETOWN', 588150, 588680]
[['NOWADAYS', 583085, 583732], ['MOST', 583906, 584081], ['OF', 584131, 584240], ['OUR', 584429, 584626], ['YOUNGER', 584713, 585090], ['UH', 585098, 585737], ['YOUNG', 585934, 586173], ['PEOPLE', 587038, 587394], ['IN', 587910, 587939], ['OUR', 587997, 588099], ['HOMETOWN', 588150, 588680]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1210', 'timeSlotRef1': 'ts641', 'timeSlotRef2':

[ITS	(0.87): [    0,    41), QUITE	(0.78): [   69,   101), CLOSE	(0.90): [  176,   216), TO	(0.89): [  241,   271), AMERICAN	(0.86): [  280,   347), STYLE	(0.85): [  351,   396), ENGLISH	(0.87): [  412,   490)] []
['ITS', 616945, 617243]
['QUITE', 617447, 617679]
['CLOSE', 618224, 618515]
['TO', 618697, 618915]
['AMERICAN', 618980, 619467]
['STYLE', 619497, 619824]
['ENGLISH', 619940, 620507]
[['ITS', 616945, 617243], ['QUITE', 617447, 617679], ['CLOSE', 618224, 618515], ['TO', 618697, 618915], ['AMERICAN', 618980, 619467], ['STYLE', 619497, 619824], ['ENGLISH', 619940, 620507]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a365', 'timeSlotRef1': 'ts679', 'timeSlotRef2': 'ts680', 'timeStamp1': 616945, 'timeStamp2': 620507, 'text': "It's quite close to American style English", 'words': [], 'phonemes': []}
!YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1225.wav
[YEAH	(0.89): [    0,    71)] []
['YEAH', 620986, 6

[YEAH	(0.92): [    0,    43), EVEN	(0.80): [   65,    94), YOU	(0.80): [   96,   105), GET	(0.78): [  115,   145), LEVEL	(0.72): [  149,   166), FOUR	(0.80): [  170,   189), LIKE	(0.83): [  209,   262)] []
['YEAH', 654392, 654705]
['EVEN', 654865, 655076]
['YOU', 655091, 655156]
['GET', 655229, 655448]
['LEVEL', 655477, 655600]
['FOUR', 655630, 655768]
['LIKE', 655914, 656299]
[['YEAH', 654392, 654705], ['EVEN', 654865, 655076], ['YOU', 655091, 655156], ['GET', 655229, 655448], ['LEVEL', 655477, 655600], ['FOUR', 655630, 655768], ['LIKE', 655914, 656299]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a369', 'timeSlotRef1': 'ts726', 'timeSlotRef2': 'ts730', 'timeStamp1': 654392, 'timeStamp2': 656445, 'text': 'Yeah even you get level four like ', 'words': [], 'phonemes': []}
LEVEL|FOUR
LEVEL|FOUR
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1232.wav
[LEVEL	(0.87): [    0,   121), FOUR	(0.92): [  122,   160)] []
['LEVEL',

[YEAH	(0.85): [    0,    65)] []
['YEAH', 696361, 696838]
[['YEAH', 696361, 696838]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1239', 'timeSlotRef1': 'ts770', 'timeSlotRef2': 'ts771', 'timeStamp1': 696361, 'timeStamp2': 696838, 'text': '!yeah', 'words': [], 'phonemes': []}
!YEAH|!YEAH
YEAH|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a375.wav
[YEAH	(0.87): [    0,    63), YEAH	(0.78): [   76,    96)] []
['YEAH', 703945, 704404]
['YEAH', 704499, 704645]
[['YEAH', 703945, 704404], ['YEAH', 704499, 704645]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a375', 'timeSlotRef1': 'ts776', 'timeSlotRef2': 'ts779', 'timeStamp1': 703945, 'timeStamp2': 704645, 'text': '!Yeah !yeah', 'words': [], 'phonemes': []}
YEP
YEP
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1240.wav
[YEP	(0.87): [    0,    58)] []
['YEP', 706476, 706899]
[['YEP', 706476, 706899]]
{'

[UH	(0.91): [    0,    44), OFTEN	(0.81): [   67,   100)] []
['UH', 741154, 741477]
['OFTEN', 741646, 741888]
[['UH', 741154, 741477], ['OFTEN', 741646, 741888]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1246', 'timeSlotRef1': 'ts819', 'timeSlotRef2': 'ts820', 'timeStamp1': 741154, 'timeStamp2': 741939, 'text': 'uh often ', 'words': [], 'phonemes': []}
BUT|IF|YOU|JUST|GO|OUT|WITH|HIS|F~|HM|CHINESE|FRIENDS|THEN|WE|JUST|SPEAK|MANDARIN|!YEAH
BUT|IF|YOU|JUST|GO|OUT|WITH|HIS|F|HM|CHINESE|FRIENDS|THEN|WE|JUST|SPEAK|MANDARIN|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1247.wav
[BUT	(0.87): [    0,    52), IF	(0.68): [   89,    96), YOU	(0.89): [  120,   158), JUST	(0.81): [  171,   206), GO	(0.84): [  247,   276), OUT	(0.81): [  298,   314), WITH	(0.85): [  321,   346), HIS	(0.85): [  363,   384), F	(0.00): [  385,   386), HM	(0.79): [  387,   394), CHINESE	(0.83): [  483,   536), FRIENDS	(0.81): [  553,   598), TH

[SO	(0.90): [    0,    21), MAYBE	(0.85): [   28,    94), I	(0.78): [  104,   123), SPEAK	(0.80): [  132,   153), SPEAK	(0.89): [  208,   248), MORE	(0.79): [  274,   299), CHINESE	(0.79): [  305,   359), THAN	(0.80): [  364,   381), BEFORE	(0.86): [  391,   464)] []
['SO', 777466, 777618]
['MAYBE', 777669, 778148]
['I', 778221, 778359]
['SPEAK', 778424, 778577]
['SPEAK', 778976, 779267]
['MORE', 779455, 779637]
['CHINESE', 779681, 780073]
['THAN', 780109, 780232]
['BEFORE', 780305, 780835]
[['SO', 777466, 777618], ['MAYBE', 777669, 778148], ['I', 778221, 778359], ['SPEAK', 778424, 778577], ['SPEAK', 778976, 779267], ['MORE', 779455, 779637], ['CHINESE', 779681, 780073], ['THAN', 780109, 780232], ['BEFORE', 780305, 780835]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1254', 'timeSlotRef1': 'ts856', 'timeSlotRef2': 'ts858', 'timeStamp1': 777466, 'timeStamp2': 780835, 'text': 'So maybe I speak speak more Chinese than before', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gi

[SOMETHING	(0.80): [    0,    57), LINKED	(0.74): [   67,    90), TO	(0.81): [  101,   119), FAMILY	(0.82): [  135,   184), STYLE	(0.82): [  195,   242)] []
['SOMETHING', 822969, 823384]
['LINKED', 823457, 823624]
['TO', 823704, 823835]
['FAMILY', 823951, 824308]
['STYLE', 824388, 824730]
[['SOMETHING', 822969, 823384], ['LINKED', 823457, 823624], ['TO', 823704, 823835], ['FAMILY', 823951, 824308], ['STYLE', 824388, 824730]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a386', 'timeSlotRef1': 'ts917', 'timeSlotRef2': 'ts918', 'timeStamp1': 822969, 'timeStamp2': 824861, 'text': 'Something linked to family style ', 'words': [], 'phonemes': []}
!THAT|!LIKE|WE|CAN|WATCH|WITH|KIDS
THAT|LIKE|WE|CAN|WATCH|WITH|KIDS
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1263.wav
[THAT	(0.82): [    0,    24), LIKE	(0.79): [   28,    47), WE	(0.84): [   53,    65), CAN	(0.93): [   72,   110), WATCH	(0.76): [  120,   146), WITH	(0.83): [  

[YEAH	(0.93): [    0,    46), BECAUSE	(0.79): [   81,   121), MY	(0.77): [  127,   142), JOB	(0.81): [  150,   170), IS	(0.79): [  182,   196), QUITE	(0.57): [  200,   222), FLEXIBLE	(0.84): [  223,   325)] []
['YEAH', 854145, 854479]
['BECAUSE', 854734, 855024]
['MY', 855068, 855177]
['JOB', 855235, 855380]
['IS', 855468, 855569]
['QUITE', 855599, 855758]
['FLEXIBLE', 855766, 856507]
[['YEAH', 854145, 854479], ['BECAUSE', 854734, 855024], ['MY', 855068, 855177], ['JOB', 855235, 855380], ['IS', 855468, 855569], ['QUITE', 855599, 855758], ['FLEXIBLE', 855766, 856507]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a392', 'timeSlotRef1': 'ts979', 'timeSlotRef2': 'ts981', 'timeStamp1': 854145, 'timeStamp2': 856507, 'text': 'Yeah because my job is quite flexible', 'words': [], 'phonemes': []}
!YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a393.wav
[YEAH	(0.86): [    0,    52)] []
['YEAH', 857053, 857437]
[['YEAH', 8

[DO	(0.88): [    0,    38), SOMETHING	(0.79): [   42,    86), TO	(0.89): [  103,   147)] []
['DO', 893363, 893640]
['SOMETHING', 893669, 893989]
['TO', 894113, 894434]
[['DO', 893363, 893640], ['SOMETHING', 893669, 893989], ['TO', 894113, 894434]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1284', 'timeSlotRef1': 'ts1028', 'timeSlotRef2': 'ts1029', 'timeStamp1': 893363, 'timeStamp2': 894470, 'text': 'do something to ', 'words': [], 'phonemes': []}
TO|LET|OURSELF|FEEL|THAT
TO|LET|OURSELF|FEEL|THAT
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1285.wav
[TO	(0.93): [    0,    55), LET	(0.80): [  107,   121), OURSELF	(0.84): [  209,   267), FEEL	(0.91): [  293,   374), THAT	(0.87): [  393,   458)] []
['TO', 894753, 895153]
['LET', 895531, 895633]
['OURSELF', 896273, 896695]
['FEEL', 896884, 897473]
['THAT', 897611, 898084]
[['TO', 894753, 895153], ['LET', 895531, 895633], ['OURSELF', 896273, 896695], ['FEEL', 896884, 897

[YOU	(0.87): [    0,    24), MEAN	(0.82): [   29,    49), FROM	(0.83): [   55,    85), CHINA	(0.85): [   95,   134), WHICH	(0.61): [  139,   150), PART	(0.83): [  175,   214), OR	(0.62): [  219,   223), WHAT	(0.86): [  231,   294), S	(0.72): [  298,   306), WHAT	(0.73): [  342,   354), KIND	(0.82): [  365,   400), OF	(0.52): [  410,   414), COUNTRY	(0.86): [  416,   482), BECAUSE	(0.74): [  505,   552), WHEN	(0.69): [  556,   570), I	(0.77): [  579,   589), STUDY	(0.78): [  595,   619), IN	(0.72): [  632,   639), UNI	(0.86): [  708,   761)] []
['YOU', 933383, 933557]
['MEAN', 933594, 933739]
['FROM', 933783, 934001]
['CHINA', 934073, 934357]
['WHICH', 934393, 934473]
['PART', 934655, 934938]
['OR', 934975, 935004]
['WHAT', 935062, 935520]
['S', 935549, 935607]
['WHAT', 935869, 935956]
['KIND', 936036, 936290]
['OF', 936363, 936392]
['COUNTRY', 936407, 936886]
['BECAUSE', 937053, 937394]
['WHEN', 937424, 937526]
['I', 937591, 937664]
['STUDY', 937707, 937882]
['IN', 937976, 938027]
['UN

[YEAH	(0.89): [    0,    66)] []
['YEAH', 967307, 967790]
[['YEAH', 967307, 967790]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1306', 'timeSlotRef1': 'ts1132', 'timeSlotRef2': 'ts1134', 'timeStamp1': 967307, 'timeStamp2': 967790, 'text': 'yeah', 'words': [], 'phonemes': []}
!SOUTHERN|!PART|!OF|!CHINA
SOUTHERN|PART|OF|CHINA
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a403.wav
[SOUTHERN	(0.85): [    0,    78), PART	(0.83): [   83,   117), OF	(0.76): [  123,   140), CHINA	(0.76): [  153,   172)] []
['SOUTHERN', 976830, 977397]
['PART', 977433, 977680]
['OF', 977724, 977846]
['CHINA', 977942, 978080]
[['SOUTHERN', 976830, 977397], ['PART', 977433, 977680], ['OF', 977724, 977846], ['CHINA', 977942, 978080]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a403', 'timeSlotRef1': 'ts1149', 'timeSlotRef2': 'ts1154', 'timeStamp1': 976830, 'timeStamp2': 978080, 'text': '!Southern !part !of !China', 'words': [], 'phonemes': 

[UH	(0.94): [    0,    35), NORTHERN	(0.68): [   48,    91), PART	(0.84): [  109,   139), OF	(0.72): [  147,   158)] []
['UH', 1002805, 1003060]
['NORTHERN', 1003155, 1003468]
['PART', 1003599, 1003817]
['OF', 1003875, 1003955]
[['UH', 1002805, 1003060], ['NORTHERN', 1003155, 1003468], ['PART', 1003599, 1003817], ['OF', 1003875, 1003955]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1319', 'timeSlotRef1': 'ts1189', 'timeSlotRef2': 'ts1190', 'timeStamp1': 1002805, 'timeStamp2': 1004072, 'text': 'uh !northern part of ', 'words': [], 'phonemes': []}
CHINA|PEOPLE|MAY|MAYBE|THE|SAME|FESTIVAL|THEY|WANT|TO|CHOOSE|DUMPLING
CHINA|PEOPLE|MAY|MAYBE|THE|SAME|FESTIVAL|THEY|WANT|TO|CHOOSE|DUMPLING
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1320.wav
[CHINA	(0.93): [    0,    76), PEOPLE	(0.79): [   91,   128), MAY	(0.95): [  137,   161), MAYBE	(0.85): [  244,   289), THE	(0.58): [  291,   297), SAME	(0.91): [  308,   338), FESTIV

[YEAH	(0.86): [    0,    82), YEAH	(0.94): [   92,   179), YEAH	(0.84): [  182,   211), SURE	(0.80): [  225,   260), ITS	(0.67): [  324,   335), QUITE	(0.82): [  344,   386), SMALL	(0.89): [  387,   467)] []
['YEAH', 1043230, 1043826]
['YEAH', 1043898, 1044530]
['YEAH', 1044552, 1044763]
['SURE', 1044864, 1045118]
['ITS', 1045583, 1045663]
['QUITE', 1045729, 1046034]
['SMALL', 1046041, 1046622]
[['YEAH', 1043230, 1043826], ['YEAH', 1043898, 1044530], ['YEAH', 1044552, 1044763], ['SURE', 1044864, 1045118], ['ITS', 1045583, 1045663], ['QUITE', 1045729, 1046034], ['SMALL', 1046041, 1046622]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a409', 'timeSlotRef1': 'ts1244', 'timeSlotRef2': 'ts1246', 'timeStamp1': 1043230, 'timeStamp2': 1046622, 'text': "!Yeah !yeah !yeah sure It's quite small", 'words': [], 'phonemes': []}
UH|SMALL|SIZE|IN|OUR|HOMETOWN|AS|WELL
UH|SMALL|SIZE|IN|OUR|HOMETOWN|AS|WELL
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CA

[FROM	(0.83): [    0,    32), MY	(0.89): [   34,    72), MOTHERS	(0.81): [   82,   124), SIDES	(0.83): [  135,   200), AND	(0.62): [  206,   212), FATHERS	(0.75): [  263,   299), SIDES	(0.81): [  307,   348), AS	(0.80): [  359,   372), WELL	(0.79): [  381,   407)] []
['FROM', 1083071, 1083304]
['MY', 1083318, 1083595]
['MOTHERS', 1083668, 1083973]
['SIDES', 1084053, 1084526]
['AND', 1084570, 1084614]
['FATHERS', 1084985, 1085247]
['SIDES', 1085305, 1085603]
['AS', 1085683, 1085778]
['WELL', 1085844, 1086033]
[['FROM', 1083071, 1083304], ['MY', 1083318, 1083595], ['MOTHERS', 1083668, 1083973], ['SIDES', 1084053, 1084526], ['AND', 1084570, 1084614], ['FATHERS', 1084985, 1085247], ['SIDES', 1085305, 1085603], ['AS', 1085683, 1085778], ['WELL', 1085844, 1086033]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1342', 'timeSlotRef1': 'ts1293', 'timeSlotRef2': 'ts1297', 'timeStamp1': 1083071, 'timeStamp2': 1086171, 'text': "from my mother's sides and father's sides as well ", 'words': [], 

[AINSH	(0.88): [    0,    53), ANCES	(0.85): [   70,   154)] []
['AINSH', 1115993, 1116380]
['ANCES', 1116504, 1117118]
[['AINSH', 1115993, 1116380], ['ANCES', 1116504, 1117118]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1351', 'timeSlotRef1': 'ts1337', 'timeSlotRef2': 'ts1339', 'timeStamp1': 1115993, 'timeStamp2': 1117118, 'text': '!ainsh~ !ances~', 'words': [], 'phonemes': []}
YEAH|!ANCESTORS|YEAH
YEAH|ANCESTORS|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a420.wav
[YEAH	(0.87): [    0,    43), ANCESTORS	(0.79): [   77,   154), YEAH	(0.85): [  171,   209)] []
['YEAH', 1117630, 1117944]
['ANCESTORS', 1118192, 1118754]
['YEAH', 1118878, 1119155]
[['YEAH', 1117630, 1117944], ['ANCESTORS', 1118192, 1118754], ['YEAH', 1118878, 1119155]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a420', 'timeSlotRef1': 'ts1340', 'timeSlotRef2': 'ts1342', 'timeStamp1': 1117630, 'timeStamp2': 1119155, 'text': 'Yeah !ancestors 

[BUT	(0.88): [    0,    27), IF	(0.64): [   46,    52), YOU	(0.88): [   66,   100), ARE	(0.90): [  101,   131), A	(0.84): [  217,   224), MALE	(0.89): [  232,   319)] []
['BUT', 1156211, 1156407]
['IF', 1156545, 1156589]
['YOU', 1156690, 1156937]
['ARE', 1156945, 1157162]
['A', 1157787, 1157838]
['MALE', 1157896, 1158528]
[['BUT', 1156211, 1156407], ['IF', 1156545, 1156589], ['YOU', 1156690, 1156937], ['ARE', 1156945, 1157162], ['A', 1157787, 1157838], ['MALE', 1157896, 1158528]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1361', 'timeSlotRef1': 'ts1385', 'timeSlotRef2': 'ts1386', 'timeStamp1': 1156211, 'timeStamp2': 1158528, 'text': 'But if you are a male', 'words': [], 'phonemes': []}
YOU|NEED|TO|BE|THERE
YOU|NEED|TO|BE|THERE
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1362.wav
[YOU	(0.92): [    0,    32), NEED	(0.74): [   40,    56), TO	(0.81): [   69,    84), BE	(0.80): [   93,   109), THERE	(0.85): [  111,   1

[MIGRATION	(0.87): [    0,   117), MIGRATION	(0.81): [  230,   303), POLICY	(0.87): [  315,   392)] []
['MIGRATION', 1193145, 1193996]
['MIGRATION', 1194817, 1195348]
['POLICY', 1195435, 1195995]
[['MIGRATION', 1193145, 1193996], ['MIGRATION', 1194817, 1195348], ['POLICY', 1195435, 1195995]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1373', 'timeSlotRef1': 'ts1445', 'timeSlotRef2': 'ts1448', 'timeStamp1': 1193145, 'timeStamp2': 1195995, 'text': 'migration migration policy', 'words': [], 'phonemes': []}
SO|FROM|COUPLE|OF|YEARS|AGO
SO|FROM|COUPLE|OF|YEARS|AGO
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1374.wav
[SO	(0.92): [    0,   129), FROM	(0.80): [  209,   234), COUPLE	(0.86): [  252,   284), OF	(0.50): [  285,   287), YEARS	(0.74): [  323,   348), AGO	(0.92): [  364,   426)] []
['SO', 1196357, 1197296]
['FROM', 1197878, 1198060]
['COUPLE', 1198191, 1198424]
['OF', 1198431, 1198445]
['YEARS', 1198707, 1198889]


[MULTICULTURAL	(0.85): [    0,   129), I	(0.76): [  149,   154), THINK	(0.93): [  157,   219)] []
['MULTICULTURAL', 1224673, 1225611]
['I', 1225756, 1225792]
['THINK', 1225814, 1226265]
[['MULTICULTURAL', 1224673, 1225611], ['I', 1225756, 1225792], ['THINK', 1225814, 1226265]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1382', 'timeSlotRef1': 'ts1480', 'timeSlotRef2': 'ts1481', 'timeStamp1': 1224673, 'timeStamp2': 1226265, 'text': 'multicultural I think', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1383.wav
[YEAH	(0.89): [    0,    63)] []
['YEAH', 1226728, 1227194]
[['YEAH', 1226728, 1227194]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1383', 'timeSlotRef1': 'ts1483', 'timeSlotRef2': 'ts1484', 'timeStamp1': 1226728, 'timeStamp2': 1227194, 'text': 'yeah', 'words': [], 'phonemes': []}
!AH|!YEAH
AH|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_M

[YEAH	(0.84): [    0,    44), LAST	(0.77): [   73,    91), TIME	(0.89): [  103,   139), WE	(0.85): [  158,   177)] []
['YEAH', 1282307, 1282628]
['LAST', 1282840, 1282971]
['TIME', 1283058, 1283321]
['WE', 1283460, 1283598]
[['YEAH', 1282307, 1282628], ['LAST', 1282840, 1282971], ['TIME', 1283058, 1283321], ['WE', 1283460, 1283598]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a431', 'timeSlotRef1': 'ts1540', 'timeSlotRef2': 'ts1544', 'timeStamp1': 1282307, 'timeStamp2': 1283715, 'text': '!Yeah last time we ', 'words': [], 'phonemes': []}
WE|CELEBRATE|THE|CHINESE|LUNAR|NEW|YEAR
WE|CELEBRATE|THE|CHINESE|LUNAR|NEW|YEAR
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1396.wav
[WE	(0.88): [    0,    24), CELEBRATE	(0.85): [   36,   114), THE	(0.92): [  123,   172), CHINESE	(0.84): [  192,   238), LUNAR	(0.66): [  285,   302), NEW	(0.87): [  332,   378), YEAR	(0.91): [  395,   448)] []
['WE', 1284047, 1284221]
['CELEBRATE', 1

[IN	(0.90): [    0,    45), THE	(0.89): [   58,   128), LILYPOND	(0.67): [  222,   250), WAY	(0.82): [  263,   281)] []
['IN', 1317541, 1317868]
['THE', 1317963, 1318473]
['LILYPOND', 1319157, 1319360]
['WAY', 1319455, 1319586]
[['IN', 1317541, 1317868], ['THE', 1317963, 1318473], ['LILYPOND', 1319157, 1319360], ['WAY', 1319455, 1319586]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1407', 'timeSlotRef1': 'ts1591', 'timeSlotRef2': 'ts1592', 'timeStamp1': 1317541, 'timeStamp2': 1319586, 'text': 'in the [Lilypond Way]', 'words': [], 'phonemes': []}
IN|[CASEY]|AREA|WE|CAN|FIND|THAT|THEY|
IN|CASEY|AREA|WE|CAN|FIND|THAT|THEY|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1408.wav
[IN	(0.87): [    0,    23), CASEY	(0.90): [  117,   202), AREA	(0.82): [  205,   229), WE	(0.74): [  230,   237), CAN	(0.88): [  246,   266), FIND	(0.82): [  283,   312), THAT	(0.82): [  316,   339), THEY	(0.00): [  343,   347)] []
['IN', 1320009,

[YEAH	(0.80): [    0,    39)] []
['YEAH', 1345397, 1345688]
[['YEAH', 1345397, 1345688]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1418', 'timeSlotRef1': 'ts1615', 'timeSlotRef2': 'ts1616', 'timeStamp1': 1345397, 'timeStamp2': 1345688, 'text': 'yeah', 'words': [], 'phonemes': []}
NOT|REALLY|BECAUSE|THE|CHILDCARE|SCHOOL|
NOT|REALLY|BECAUSE|THE|CHILDCARE|SCHOOL|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a434.wav
[NOT	(0.94): [    0,    63), REALLY	(0.88): [   65,   139), BECAUSE	(0.83): [  147,   203), THE	(0.90): [  222,   275), CHILDCARE	(0.88): [  290,   393), SCHOOL	(0.88): [  397,   471)] []
['NOT', 1355830, 1356288]
['REALLY', 1356302, 1356840]
['BECAUSE', 1356898, 1357305]
['THE', 1357443, 1357828]
['CHILDCARE', 1357937, 1358685]
['SCHOOL', 1358714, 1359252]
[['NOT', 1355830, 1356288], ['REALLY', 1356302, 1356840], ['BECAUSE', 1356898, 1357305], ['THE', 1357443, 1357828], ['CHILDCARE', 1357937, 1358685], ['

[UH	(0.92): [    0,    46), YOU	(0.78): [   54,    67), STILL	(0.88): [   87,   137), CAN	(0.93): [  143,   176), TALK	(0.77): [  230,   263), TO	(0.78): [  269,   284), HIM	(0.92): [  295,   332), DIRECTLY	(0.79): [  357,   419), OR	(0.90): [  430,   499)] []
['UH', 1408712, 1409046]
['YOU', 1409104, 1409199]
['STILL', 1409344, 1409707]
['CAN', 1409751, 1409990]
['TALK', 1410382, 1410622]
['TO', 1410666, 1410775]
['HIM', 1410855, 1411123]
['DIRECTLY', 1411305, 1411755]
['OR', 1411835, 1412336]
[['UH', 1408712, 1409046], ['YOU', 1409104, 1409199], ['STILL', 1409344, 1409707], ['CAN', 1409751, 1409990], ['TALK', 1410382, 1410622], ['TO', 1410666, 1410775], ['HIM', 1410855, 1411123], ['DIRECTLY', 1411305, 1411755], ['OR', 1411835, 1412336]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1433', 'timeSlotRef1': 'ts1673', 'timeSlotRef2': 'ts1674', 'timeStamp1': 1408712, 'timeStamp2': 1412358, 'text': 'uh you still can talk to him directly or ', 'words': [], 'phonemes': []}
WHATEVER|EXPRE

[AND	(0.92): [    0,    73), AND	(0.77): [  166,   175), WEAR	(0.88): [  302,   334), THE	(0.76): [  341,   354), SLIPPER	(0.79): [  357,   433)] []
['AND', 1460494, 1461024]
['AND', 1461700, 1461765]
['WEAR', 1462687, 1462920]
['THE', 1462971, 1463065]
['SLIPPER', 1463087, 1463639]
[['AND', 1460494, 1461024], ['AND', 1461700, 1461765], ['WEAR', 1462687, 1462920], ['THE', 1462971, 1463065], ['SLIPPER', 1463087, 1463639]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1445', 'timeSlotRef1': 'ts1712', 'timeSlotRef2': 'ts1714', 'timeStamp1': 1460494, 'timeStamp2': 1463639, 'text': 'and and wear the slipper', 'words': [], 'phonemes': []}
AND|AND|SHORT|TROU~|UH|SHORT|PANTS|
AND|AND|SHORT|TROU|UH|SHORT|PANTS|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1447.wav
[AND	(0.94): [    0,    53), AND	(0.49): [  102,   109), SHORT	(0.74): [  142,   167), TROU	(0.80): [  181,   214), UH	(0.71): [  237,   246), SHORT	(0.76): [  260, 

[SOME	(0.89): [    0,    41), KIND	(0.82): [   49,    79), OF	(0.67): [   86,    96), JOKE	(0.89): [  115,   154), YOU	(0.87): [  155,   188), CANNOT	(0.87): [  197,   253), LIKE	(0.85): [  324,   354), LIKE	(0.89): [  355,   417), MAKING	(0.82): [  443,   479), THE	(0.76): [  483,   496), JOKE	(0.83): [  505,   533), WITH	(0.71): [  541,   556), CHINESE	(0.86): [  577,   642), JUST	(0.78): [  652,   682), YOU	(0.86): [  699,   714), FEEL	(0.84): [  758,   798), THE	(0.78): [  804,   816), SAME	(0.88): [  833,   867), WAY	(0.89): [  875,   916), AS	(0.75): [  917,   929), AUSTRALIAN	(0.85): [  941,  1022), STYLE	(0.86): [ 1026,  1077)] []
['SOME', 1489904, 1490202]
['KIND', 1490260, 1490478]
['OF', 1490528, 1490601]
['JOKE', 1490739, 1491022]
['YOU', 1491030, 1491269]
['CANNOT', 1491335, 1491741]
['LIKE', 1492257, 1492475]
['LIKE', 1492482, 1492932]
['MAKING', 1493121, 1493382]
['THE', 1493411, 1493506]
['JOKE', 1493571, 1493774]
['WITH', 1493833, 1493941]
['CHINESE', 1494094, 1494566]

[YEAH	(0.81): [    0,    44), ITS	(0.72): [  100,   111), NOT	(0.76): [  118,   137), RESPECTS	(0.83): [  145,   207), PEOPLE	(0.80): [  227,   275)] []
['YEAH', 1515329, 1515649]
['ITS', 1516057, 1516137]
['NOT', 1516188, 1516326]
['RESPECTS', 1516384, 1516835]
['PEOPLE', 1516981, 1517330]
[['YEAH', 1515329, 1515649], ['ITS', 1516057, 1516137], ['NOT', 1516188, 1516326], ['RESPECTS', 1516384, 1516835], ['PEOPLE', 1516981, 1517330]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1472', 'timeSlotRef1': 'ts1781', 'timeSlotRef2': 'ts1782', 'timeStamp1': 1515329, 'timeStamp2': 1517483, 'text': "yeah it's not respects people ", 'words': [], 'phonemes': []}
!YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1473.wav
[YEAH	(0.90): [    0,    74)] []
['YEAH', 1518070, 1518615]
[['YEAH', 1518070, 1518615]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1473', 'timeSlotRef1': 'ts1783', 'timeSlotRef2': 'ts1786', 'timeStam

[WILL	(0.91): [    0,    52), HURT	(0.75): [   70,    86), YOUR	(0.86): [   87,   117), PRIVACY	(0.80): [  136,   183), PART	(0.87): [  212,   250), OR	(0.83): [  256,   265), THINGS	(0.70): [  279,   302), LIKE	(0.62): [  312,   323), THIS	(0.89): [  326,   380)] []
['WILL', 1549246, 1549624]
['HURT', 1549755, 1549871]
['YOUR', 1549879, 1550097]
['PRIVACY', 1550235, 1550577]
['PART', 1550787, 1551064]
['OR', 1551107, 1551173]
['THINGS', 1551275, 1551442]
['LIKE', 1551515, 1551595]
['THIS', 1551616, 1552009]
[['WILL', 1549246, 1549624], ['HURT', 1549755, 1549871], ['YOUR', 1549879, 1550097], ['PRIVACY', 1550235, 1550577], ['PART', 1550787, 1551064], ['OR', 1551107, 1551173], ['THINGS', 1551275, 1551442], ['LIKE', 1551515, 1551595], ['THIS', 1551616, 1552009]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1483', 'timeSlotRef1': 'ts1809', 'timeSlotRef2': 'ts1812', 'timeStamp1': 1549246, 'timeStamp2': 1552009, 'text': 'will hurt your privacy part or things !like !this', 'words': [], '

[YEAH	(0.91): [    0,    73)] []
['YEAH', 1593857, 1594393]
[['YEAH', 1593857, 1594393]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1496', 'timeSlotRef1': 'ts1848', 'timeSlotRef2': 'ts1850', 'timeStamp1': 1593857, 'timeStamp2': 1594393, 'text': '!yeah', 'words': [], 'phonemes': []}
IF|HE|CANNOT|SPEAK|OR|UNDERSTAND|TIANZHUNESE|
IF|HE|CANNOT|SPEAK|OR|UNDERSTAND|TIANZHUNESE|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a443.wav
[IF	(0.91): [    0,    32), HE	(0.91): [   43,    63), CANNOT	(0.86): [   72,   121), SPEAK	(0.94): [  122,   233), OR	(0.01): [  256,   258), UNDERSTAND	(0.86): [  259,   329), TIANZHUNESE	(0.87): [  341,   453)] []
['IF', 1594577, 1594810]
['HE', 1594890, 1595035]
['CANNOT', 1595101, 1595457]
['SPEAK', 1595464, 1596271]
['OR', 1596439, 1596453]
['UNDERSTAND', 1596461, 1596970]
['TIANZHUNESE', 1597057, 1597871]
[['IF', 1594577, 1594810], ['HE', 1594890, 1595035], ['CANNOT', 1595101, 1595457], [

[YEAH	(0.97): [    0,    50), QUITE	(0.63): [   65,    83), OFTEN	(0.75): [  100,   128)] []
['YEAH', 1624121, 1624486]
['QUITE', 1624596, 1624727]
['OFTEN', 1624851, 1625056]
[['YEAH', 1624121, 1624486], ['QUITE', 1624596, 1624727], ['OFTEN', 1624851, 1625056]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a444', 'timeSlotRef1': 'ts1875', 'timeSlotRef2': 'ts1876', 'timeStamp1': 1624121, 'timeStamp2': 1625275, 'text': 'Yeah quite often ', 'words': [], 'phonemes': []}
!YEAH|MAYBE|W~
YEAH|MAYBE|W
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1503.wav
[YEAH	(0.88): [    0,    95), MAYBE	(0.81): [   99,   123), W	(1.00): [  126,   152)] []
['YEAH', 1625675, 1626368]
['MAYBE', 1626397, 1626572]
['W', 1626594, 1626784]
[['YEAH', 1625675, 1626368], ['MAYBE', 1626397, 1626572], ['W', 1626594, 1626784]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1503', 'timeSlotRef1': 'ts1878', 'timeSlotRef2': 'ts1881', 'timeStamp1': 1625

[YEAH	(0.81): [    0,    52)] []
['YEAH', 1648830, 1649212]
[['YEAH', 1648830, 1649212]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1513', 'timeSlotRef1': 'ts1908', 'timeSlotRef2': 'ts1910', 'timeStamp1': 1648830, 'timeStamp2': 1649212, 'text': 'yeah', 'words': [], 'phonemes': []}
YEP
YEP
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a446.wav
[YEP	(0.87): [    0,    44)] []
['YEP', 1655394, 1655721]
[['YEP', 1655394, 1655721]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a446', 'timeSlotRef1': 'ts1917', 'timeSlotRef2': 'ts1918', 'timeStamp1': 1655394, 'timeStamp2': 1655721, 'text': 'yep', 'words': [], 'phonemes': []}
I|THINK|MORE|AUSTRALIAN|STYLE|
I|THINK|MORE|AUSTRALIAN|STYLE|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a447.wav
[I	(0.92): [    0,    27), THINK	(0.80): [   35,    65), MORE	(0.85): [   68,   108), AUSTRALIAN	(0.83): [  123,   197), 

[YOU	(0.90): [    0,    28), JUST	(0.84): [   34,    61), DO	(0.93): [   72,   107), WHATEVER	(0.77): [  127,   173), YOU	(0.71): [  177,   191), THINK	(0.69): [  195,   209), IS	(0.83): [  224,   243), RIGHT	(0.86): [  252,   292), THEN	(0.64): [  347,   359), YOU	(0.72): [  362,   375), JUST	(0.72): [  378,   393), DO	(0.81): [  410,   431), THAT	(0.87): [  433,   455), BUT	(0.72): [  522,   537), IN	(0.70): [  560,   567), CHINA	(0.87): [  578,   632), IF	(0.67): [  646,   652), YOU	(0.91): [  666,   718), THINK	(0.79): [  726,   749), YOURE	(0.76): [  757,   779), RIGHT	(0.81): [  788,   818), AND	(0.91): [  849,   854), YOU	(0.70): [  857,   868), FOLLOW	(0.82): [  875,   907), THE	(0.82): [  912,   922), RIGHT	(0.82): [  938,   969), RULE	(0.87): [  981,  1017)] []
['YOU', 1680761, 1680964]
['JUST', 1681008, 1681204]
['DO', 1681284, 1681538]
['WHATEVER', 1681683, 1682017]
['YOU', 1682046, 1682148]
['THINK', 1682177, 1682279]
['IS', 1682388, 1682526]
['RIGHT', 1682591, 1682882]
['

[PEOPLE	(0.80): [    0,    42), LIKE	(0.80): [   46,    67), TO	(0.89): [   70,    83), VISIT	(0.78): [   97,   134), RELATIVES	(0.82): [  140,   205), VERY	(0.82): [  228,   257), OFTEN	(0.91): [  264,   350)] []
['PEOPLE', 1714354, 1714659]
['LIKE', 1714688, 1714841]
['TO', 1714863, 1714958]
['VISIT', 1715059, 1715328]
['RELATIVES', 1715372, 1715845]
['VERY', 1716012, 1716223]
['OFTEN', 1716274, 1716899]
[['PEOPLE', 1714354, 1714659], ['LIKE', 1714688, 1714841], ['TO', 1714863, 1714958], ['VISIT', 1715059, 1715328], ['RELATIVES', 1715372, 1715845], ['VERY', 1716012, 1716223], ['OFTEN', 1716274, 1716899]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1533', 'timeSlotRef1': 'ts1973', 'timeSlotRef2': 'ts1974', 'timeStamp1': 1714354, 'timeStamp2': 1716899, 'text': 'people like to visit relatives very often', 'words': [], 'phonemes': []}
!YEAH|!AND|
YEAH|AND|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a450.wav
[YEAH	(0.

[YEAH	(0.92): [    0,    72)] []
['YEAH', 1743838, 1744366]
[['YEAH', 1743838, 1744366]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a1541', 'timeSlotRef1': 'ts1998', 'timeSlotRef2': 'ts1999', 'timeStamp1': 1743838, 'timeStamp2': 1744366, 'text': 'yeah', 'words': [], 'phonemes': []}
YEAH|YEAH
YEAH|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a451.wav
[YEAH	(0.91): [    0,    53), YEAH	(0.81): [   66,    88)] []
['YEAH', 1754430, 1754819]
['YEAH', 1754914, 1755076]
[['YEAH', 1754430, 1754819], ['YEAH', 1754914, 1755076]]
{'tiername': 'LaBB-CAT_PNT_ Stella ', 'annoID': 'a451', 'timeSlotRef1': 'ts2009', 'timeSlotRef2': 'ts2010', 'timeStamp1': 1754430, 'timeStamp2': 1755076, 'text': 'Yeah yeah', 'words': [], 'phonemes': []}
!CHANGED|!A|!LOT
CHANGED|A|LOT
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\LaBB-CAT_PNT__Stella__a1543.wav
[CHANGED	(0.84): [    0,    42), A	(0.84): [  

[IF	(0.86): [    0,    22), YOU	(0.82): [   26,    42), STAY	(0.84): [   44,    69), IN	(0.79): [   79,    87), CHINA	(0.85): [  106,   157), A	(0.85): [  158,   162), LOT	(0.69): [  169,   179), OF	(0.55): [  180,   185), TIME	(0.72): [  191,   209), YOU	(0.93): [  219,   258), NEED	(0.60): [  270,   279), TO	(0.86): [  284,   296), SPEND	(0.88): [  297,   346), TO	(0.84): [  358,   373)] []
['IF', 1791777, 1791937]
['YOU', 1791966, 1792082]
['STAY', 1792097, 1792278]
['IN', 1792351, 1792409]
['CHINA', 1792547, 1792918]
['A', 1792925, 1792954]
['LOT', 1793005, 1793078]
['OF', 1793085, 1793121]
['TIME', 1793165, 1793296]
['YOU', 1793369, 1793652]
['NEED', 1793739, 1793805]
['TO', 1793841, 1793928]
['SPEND', 1793935, 1794292]
['TO', 1794379, 1794488]
[['IF', 1791777, 1791937], ['YOU', 1791966, 1792082], ['STAY', 1792097, 1792278], ['IN', 1792351, 1792409], ['CHINA', 1792547, 1792918], ['A', 1792925, 1792954], ['LOT', 1793005, 1793078], ['OF', 1793085, 1793121], ['TIME', 1793165, 1793296

[YEAH	(0.92): [    0,    68)] []
['YEAH', 25170, 25670]
[['YEAH', 25170, 25670]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1053', 'timeSlotRef1': 'ts23', 'timeSlotRef2': 'ts26', 'timeStamp1': 25170, 'timeStamp2': 25670, 'text': 'yeah', 'words': [], 'phonemes': []}
YEAH|WHERE|I|COME|FROM
YEAH|WHERE|I|COME|FROM
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a809.wav
[YEAH	(0.93): [    0,    35), WHERE	(0.74): [   88,   113), I	(0.79): [  118,   122), COME	(0.73): [  126,   147), FROM	(0.92): [  151,   198)] []
['YEAH', 28360, 28615]
['WHERE', 29000, 29182]
['I', 29218, 29247]
['COME', 29276, 29429]
['FROM', 29458, 29800]
[['YEAH', 28360, 28615], ['WHERE', 29000, 29182], ['I', 29218, 29247], ['COME', 29276, 29429], ['FROM', 29458, 29800]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a809', 'timeSlotRef1': 'ts30', 'timeSlotRef2': 'ts31', 'timeStamp1': 28360, 'timeStamp2': 29800, 'text': 'Yeah where I come from', 'words': [], 'phonemes': []}
YEAH|

[SO	(0.90): [    0,    43), SO	(0.71): [   84,   102), WHAT	(0.85): [  108,   183), DID	(0.75): [  199,   222), YOU	(0.78): [  232,   247), DO	(0.91): [  248,   318)] []
['SO', 131030, 131342]
['SO', 131640, 131771]
['WHAT', 131815, 132359]
['DID', 132476, 132643]
['YOU', 132715, 132824]
['DO', 132831, 133340]
[['SO', 131030, 131342], ['SO', 131640, 131771], ['WHAT', 131815, 132359], ['DID', 132476, 132643], ['YOU', 132715, 132824], ['DO', 132831, 133340]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a815', 'timeSlotRef1': 'ts136', 'timeSlotRef2': 'ts140', 'timeStamp1': 131030, 'timeStamp2': 133340, 'text': 'So so what did you do', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1081.wav
[YEAH	(0.84): [    0,    94)] []
['YEAH', 144460, 145150]
[['YEAH', 144460, 145150]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1081', 'timeSlotRef1': 'ts145', 'timeSlotRef2': 'ts146', 'timeStamp1': 144460, 'timeStamp2': 

[LIKE	(0.75): [    0,    24), FIRST	(0.81): [   35,    65), GET	(0.75): [   72,    86), OFF	(0.75): [   90,   101), THE	(0.74): [  106,   115), PLANE	(0.88): [  122,   192)] []
['LIKE', 246430, 246605]
['FIRST', 246685, 246904]
['GET', 246955, 247057]
['OFF', 247086, 247166]
['THE', 247203, 247269]
['PLANE', 247320, 247830]
[['LIKE', 246430, 246605], ['FIRST', 246685, 246904], ['GET', 246955, 247057], ['OFF', 247086, 247166], ['THE', 247203, 247269], ['PLANE', 247320, 247830]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1110', 'timeSlotRef1': 'ts248', 'timeSlotRef2': 'ts251', 'timeStamp1': 246430, 'timeStamp2': 247830, 'text': 'like first get off the plane', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1120.wav
[YEAH	(0.90): [    0,    59)] []
['YEAH', 256195, 256625]
[['YEAH', 256195, 256625]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1120', 'timeSlotRef1': 'ts262', 'timeSlotRef2': 'ts263', 'timeSt

[SO	(0.81): [    0,   138)] []
['SO', 336751, 337759]
[['SO', 336751, 337759]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1142', 'timeSlotRef1': 'ts346', 'timeSlotRef2': 'ts347', 'timeStamp1': 336751, 'timeStamp2': 337759, 'text': 'so', 'words': [], 'phonemes': []}
ALMOST|TWEL~|YEA~|TWELVE|YEARS
ALMOST|TWEL|YEA|TWELVE|YEARS
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a823.wav
[ALMOST	(0.84): [    0,    36), TWEL	(0.81): [   43,    68), YEA	(0.84): [   71,    90), TWELVE	(0.76): [  118,   145), YEARS	(0.88): [  146,   197)] []
['ALMOST', 338255, 338518]
['TWEL', 338569, 338751]
['YEA', 338773, 338912]
['TWELVE', 339116, 339313]
['YEARS', 339321, 339693]
[['ALMOST', 338255, 338518], ['TWEL', 338569, 338751], ['YEA', 338773, 338912], ['TWELVE', 339116, 339313], ['YEARS', 339321, 339693]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a823', 'timeSlotRef1': 'ts349', 'timeSlotRef2': 'ts351', 'timeStamp1': 338255, 'timeStamp2': 339693, 'text': 'Alm

[EVEN	(0.85): [    0,    35), NOW	(0.84): [   41,    72), EVEN	(0.95): [   73,   170), NOW	(0.85): [  175,   195), YOU	(0.73): [  196,   208), SPEND	(0.83): [  218,   253), EQUAL	(0.83): [  275,   327), AMOUNT	(0.78): [  328,   355), OF	(0.65): [  360,   367), TIME	(0.82): [  373,   405), IN	(0.69): [  482,   489), BOTH	(0.83): [  492,   548), CITIES	(0.72): [  554,   580)] []
['EVEN', 381851, 382105]
['NOW', 382149, 382374]
['EVEN', 382382, 383087]
['NOW', 383123, 383268]
['YOU', 383276, 383363]
['SPEND', 383436, 383690]
['EQUAL', 383850, 384228]
['AMOUNT', 384235, 384431]
['OF', 384468, 384519]
['TIME', 384562, 384795]
['IN', 385355, 385406]
['BOTH', 385427, 385834]
['CITIES', 385878, 386067]
[['EVEN', 381851, 382105], ['NOW', 382149, 382374], ['EVEN', 382382, 383087], ['NOW', 383123, 383268], ['YOU', 383276, 383363], ['SPEND', 383436, 383690], ['EQUAL', 383850, 384228], ['AMOUNT', 384235, 384431], ['OF', 384468, 384519], ['TIME', 384562, 384795], ['IN', 385355, 385406], ['BOTH', 385

[YEAH	(0.90): [    0,    51)] []
['YEAH', 443539, 443916]
[['YEAH', 443539, 443916]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1172', 'timeSlotRef1': 'ts458', 'timeSlotRef2': 'ts460', 'timeStamp1': 443539, 'timeStamp2': 443916, 'text': 'yeah', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1175.wav
[YEAH	(0.88): [    0,    46)] []
['YEAH', 451108, 451446]
[['YEAH', 451108, 451446]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1175', 'timeSlotRef1': 'ts467', 'timeSlotRef2': 'ts469', 'timeStamp1': 451108, 'timeStamp2': 451446, 'text': 'yeah', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1178.wav
[YEAH	(0.86): [    0,    54)] []
['YEAH', 453038, 453438]
[['YEAH', 453038, 453438]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1178', 'timeSlotRef1': 'ts471', 'timeSlotRef2': 'ts472', 'timeStamp1': 453038, 'timeStamp2': 45

[WHEN	(0.82): [    0,    36), UM	(0.87): [   75,   124), I	(0.86): [  181,   192), STARTED	(0.55): [  196,   215), LEARNING	(0.88): [  216,   279), ENGLISH	(0.93): [  280,   389), YEAH	(0.90): [  394,   429)] []
['WHEN', 526609, 526870]
['UM', 527154, 527509]
['I', 527923, 528003]
['STARTED', 528032, 528170]
['LEARNING', 528177, 528634]
['ENGLISH', 528642, 529434]
['YEAH', 529470, 529724]
[['WHEN', 526609, 526870], ['UM', 527154, 527509], ['I', 527923, 528003], ['STARTED', 528032, 528170], ['LEARNING', 528177, 528634], ['ENGLISH', 528642, 529434], ['YEAH', 529470, 529724]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a838', 'timeSlotRef1': 'ts569', 'timeSlotRef2': 'ts578', 'timeStamp1': 526609, 'timeStamp2': 529724, 'text': 'when um I started learning English yeah', 'words': [], 'phonemes': []}
IT'S|THE|SAME|OH|OKAY|YEAH
ITS|THE|SAME|OH|OKAY|YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a839.wav
[ITS	(0.81): [    0,    35), THE	(0.32)

[THAN	(0.83): [    0,    29), TIANZHUNESE	(0.91): [   86,   266), OR	(0.78): [  277,   293), ABOUT	(0.70): [  303,   325), THE	(0.81): [  329,   370), EMO	(0.83): [  374,   406), EQUAL	(0.75): [  415,   446), AMOUNT	(0.88): [  453,   513)] []
['THAN', 575681, 575892]
['TIANZHUNESE', 576306, 577616]
['OR', 577696, 577812]
['ABOUT', 577885, 578045]
['THE', 578074, 578372]
['EMO', 578401, 578634]
['EQUAL', 578699, 578925]
['AMOUNT', 578976, 579412]
[['THAN', 575681, 575892], ['TIANZHUNESE', 576306, 577616], ['OR', 577696, 577812], ['ABOUT', 577885, 578045], ['THE', 578074, 578372], ['EMO', 578401, 578634], ['EQUAL', 578699, 578925], ['AMOUNT', 578976, 579412]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1209', 'timeSlotRef1': 'ts622', 'timeSlotRef2': 'ts627', 'timeStamp1': 575681, 'timeStamp2': 579412, 'text': 'than Tianzhunese Or about the emo~ equal amount', 'words': [], 'phonemes': []}
ALMOST|MANDARIN
ALMOST|MANDARIN
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_00

[YEAH	(0.95): [    0,    45), ITS	(0.82): [   46,    61), VERY	(0.69): [   71,    84), DIFFERENT	(0.74): [   86,   127), FROM	(0.84): [  131,   168), THE	(0.75): [  178,   190), LANGUAGE	(0.77): [  194,   222), THAT	(0.79): [  231,   268), I	(0.51): [  269,   270), LEARN	(0.92): [  274,   333)] []
['YEAH', 652032, 652359]
['ITS', 652367, 652476]
['VERY', 652549, 652643]
['DIFFERENT', 652658, 652956]
['FROM', 652985, 653254]
['THE', 653327, 653414]
['LANGUAGE', 653444, 653647]
['THAT', 653713, 653982]
['I', 653989, 653997]
['LEARN', 654026, 654455]
[['YEAH', 652032, 652359], ['ITS', 652367, 652476], ['VERY', 652549, 652643], ['DIFFERENT', 652658, 652956], ['FROM', 652985, 653254], ['THE', 653327, 653414], ['LANGUAGE', 653444, 653647], ['THAT', 653713, 653982], ['I', 653989, 653997], ['LEARN', 654026, 654455]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a845', 'timeSlotRef1': 'ts721', 'timeSlotRef2': 'ts727', 'timeStamp1': 652032, 'timeStamp2': 654455, 'text': "Yeah it's very different from t

[SO	(0.75): [    0,    37), WITH	(0.74): [   45,    60), YOUR	(0.90): [   69,   135), PARENTS	(0.84): [  210,   259), YOU	(0.69): [  271,   289), TEND	(0.84): [  291,   321), TO	(0.76): [  328,   337), SPEAK	(0.88): [  343,   380), MORE	(0.89): [  435,   492), TIANZHUNESE	(0.90): [  502,   643), WITH	(0.74): [  651,   665), YOUR	(0.68): [  679,   690), HUSBAND	(0.79): [  694,   725), YOU	(0.73): [  726,   738), SPEAK	(0.72): [  741,   764), MANDARIN	(0.87): [  859,   936)] []
['SO', 699728, 699997]
['WITH', 700055, 700164]
['YOUR', 700229, 700709]
['PARENTS', 701254, 701610]
['YOU', 701697, 701828]
['TEND', 701842, 702060]
['TO', 702111, 702176]
['SPEAK', 702220, 702489]
['MORE', 702888, 703302]
['TIANZHUNESE', 703375, 704399]
['WITH', 704457, 704559]
['YOUR', 704661, 704741]
['HUSBAND', 704770, 704995]
['YOU', 705002, 705090]
['SPEAK', 705111, 705278]
['MANDARIN', 705969, 706528]
[['SO', 699728, 699997], ['WITH', 700055, 700164], ['YOUR', 700229, 700709], ['PARENTS', 701254, 701610], 

[YEAH	(0.85): [    0,    50)] []
['YEAH', 768107, 768477]
[['YEAH', 768107, 768477]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1253', 'timeSlotRef1': 'ts838', 'timeSlotRef2': 'ts841', 'timeStamp1': 768107, 'timeStamp2': 768477, 'text': 'yeah', 'words': [], 'phonemes': []}
BUT|WHEN|YOU'RE|COMMUNICATING|WITH|GOVERNMENT|AGENCIES|CAUSE|YOU|DO|YOU|HAVE|TO|DO|YEAH|CAUSE|YOU|HAVE|TO
BUT|WHEN|YOURE|COMMUNICATING|WITH|GOVERNMENT|AGENCIES|CAUSE|YOU|DO|YOU|HAVE|TO|DO|YEAH|CAUSE|YOU|HAVE|TO
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a858.wav
[BUT	(0.83): [    0,    19), WHEN	(0.85): [   28,    43), YOURE	(0.85): [   55,    90), COMMUNICATING	(0.80): [   92,   175), WITH	(0.65): [  178,   189), GOVERNMENT	(0.79): [  207,   267), AGENCIES	(0.76): [  293,   334), CAUSE	(0.81): [  401,   436), YOU	(0.84): [  441,   477), DO	(0.87): [  481,   498), YOU	(0.54): [  502,   508), HAVE	(0.53): [  509,   518), TO	(0.81): [  521,   533), DO	(0.95): [  541,

[COMEDY	(0.85): [    0,    83)] []
['COMEDY', 820570, 821178]
[['COMEDY', 820570, 821178]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1262', 'timeSlotRef1': 'ts914', 'timeSlotRef2': 'ts915', 'timeStamp1': 820570, 'timeStamp2': 821178, 'text': 'comedy', 'words': [], 'phonemes': []}
OH|OKAY
OH|OKAY
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1267.wav
[OH	(0.78): [    0,    22), OKAY	(0.78): [   47,    74)] []
['OH', 824924, 825086]
['OKAY', 825271, 825470]
[['OH', 824924, 825086], ['OKAY', 825271, 825470]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1267', 'timeSlotRef1': 'ts919', 'timeSlotRef2': 'ts921', 'timeStamp1': 824924, 'timeStamp2': 825470, 'text': 'oh okay', 'words': [], 'phonemes': []}
YEAH|DO|YOU|WATCH|CHINESE|CARTOON|WITH|HIM
YEAH|DO|YOU|WATCH|CHINESE|CARTOON|WITH|HIM
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1268.wav
[YEAH	(0.84): [    0,    28), DO	(0.90): [   81,   10

[I	(0.92): [    0,    73), THINK	(0.83): [   74,   129), ITS	(0.66): [  130,   141), LAST	(0.60): [  144,   154), NIGHT	(0.59): [  158,   172), YEAH	(0.01): [  173,   177), YEAH	(0.64): [  179,   191), YEAH	(0.39): [  239,   245), LAST	(0.73): [  253,   272), NIGHT	(0.85): [  283,   318)] []
['I', 866093, 866625]
['THINK', 866632, 867033]
['ITS', 867040, 867120]
['LAST', 867142, 867215]
['NIGHT', 867244, 867346]
['YEAH', 867353, 867382]
['YEAH', 867397, 867484]
['YEAH', 867834, 867877]
['LAST', 867936, 868074]
['NIGHT', 868154, 868409]
[['I', 866093, 866625], ['THINK', 866632, 867033], ['ITS', 867040, 867120], ['LAST', 867142, 867215], ['NIGHT', 867244, 867346], ['YEAH', 867353, 867382], ['YEAH', 867397, 867484], ['YEAH', 867834, 867877], ['LAST', 867936, 868074], ['NIGHT', 868154, 868409]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a868', 'timeSlotRef1': 'ts992', 'timeSlotRef2': 'ts997', 'timeStamp1': 866093, 'timeStamp2': 868409, 'text': "I think it's last night yeah yeah yeah last night

[YEAH	(0.89): [    0,    47)] []
['YEAH', 932031, 932377]
[['YEAH', 932031, 932377]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1296', 'timeSlotRef1': 'ts1087', 'timeSlotRef2': 'ts1088', 'timeStamp1': 932031, 'timeStamp2': 932377, 'text': 'yeah', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a874.wav
[YEAH	(0.89): [    0,    64)] []
['YEAH', 944447, 944916]
[['YEAH', 944447, 944916]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a874', 'timeSlotRef1': 'ts1100', 'timeSlotRef2': 'ts1103', 'timeStamp1': 944447, 'timeStamp2': 944916, 'text': 'yeah', 'words': [], 'phonemes': []}
LIKE|FRIENDS|IN|GENERAL
LIKE|FRIENDS|IN|GENERAL
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a875.wav
[LIKE	(0.89): [    0,    45), FRIENDS	(0.78): [   57,    95), IN	(0.78): [  102,   112), GENERAL	(0.85): [  118,   197)] []
['LIKE', 951401, 951729]
['FRIENDS', 951817, 952094]
['IN

[YEAH	(0.89): [    0,    56)] []
['YEAH', 1012895, 1013304]
[['YEAH', 1012895, 1013304]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1321', 'timeSlotRef1': 'ts1200', 'timeSlotRef2': 'ts1203', 'timeStamp1': 1012895, 'timeStamp2': 1013304, 'text': 'yeah', 'words': [], 'phonemes': []}
WELL|I|I|EAT|BOTH
WELL|I|I|EAT|BOTH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a880.wav
[WELL	(0.90): [    0,    51), I	(0.47): [   56,    58), I	(0.88): [   59,    62), EAT	(0.63): [   92,    98), BOTH	(0.91): [  122,   206)] []
['WELL', 1013779, 1014150]
['I', 1014187, 1014201]
['I', 1014209, 1014230]
['EAT', 1014449, 1014493]
['BOTH', 1014667, 1015279]
[['WELL', 1013779, 1014150], ['I', 1014187, 1014201], ['I', 1014209, 1014230], ['EAT', 1014449, 1014493], ['BOTH', 1014667, 1015279]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a880', 'timeSlotRef1': 'ts1205', 'timeSlotRef2': 'ts1206', 'timeStamp1': 1013779, 'timeStamp2': 1015279, 'text': 'Well I I eat both', 

[OH	(0.81): [    0,    20), RELATIVES	(0.85): [   24,   128)] []
['OH', 1064204, 1064350]
['RELATIVES', 1064379, 1065137]
[['OH', 1064204, 1064350], ['RELATIVES', 1064379, 1065137]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1338', 'timeSlotRef1': 'ts1263', 'timeSlotRef2': 'ts1265', 'timeStamp1': 1064204, 'timeStamp2': 1065137, 'text': 'oh relatives', 'words': [], 'phonemes': []}
MOST|OF|THEM|ARE|IN|CHINA
MOST|OF|THEM|ARE|IN|CHINA
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a885.wav
[MOST	(0.84): [    0,    33), OF	(0.64): [   39,    47), THEM	(0.72): [   49,    61), ARE	(0.70): [   80,    90), IN	(0.32): [  123,   126), CHINA	(0.64): [  127,   137)] []
['MOST', 1066795, 1067036]
['OF', 1067080, 1067138]
['THEM', 1067153, 1067240]
['ARE', 1067379, 1067452]
['IN', 1067693, 1067715]
['CHINA', 1067722, 1067795]
[['MOST', 1066795, 1067036], ['OF', 1067080, 1067138], ['THEM', 1067153, 1067240], ['ARE', 1067379, 1067452], ['IN', 1067693, 10

[CAUSE	(0.80): [    0,    32), I	(0.59): [   33,    34), HAVENT	(0.80): [   39,    69), DONE	(0.79): [   76,    95), THAT	(0.69): [  100,   112), IN	(0.67): [  130,   136), FOR	(0.88): [  140,   162), MANY	(0.76): [  251,   266), YEARS	(0.87): [  272,   324), ALREADY	(0.86): [  404,   446)] []
['CAUSE', 1125370, 1125603]
['I', 1125610, 1125617]
['HAVENT', 1125653, 1125872]
['DONE', 1125922, 1126061]
['THAT', 1126097, 1126184]
['IN', 1126315, 1126359]
['FOR', 1126388, 1126548]
['MANY', 1127195, 1127304]
['YEARS', 1127347, 1127725]
['ALREADY', 1128307, 1128612]
[['CAUSE', 1125370, 1125603], ['I', 1125610, 1125617], ['HAVENT', 1125653, 1125872], ['DONE', 1125922, 1126061], ['THAT', 1126097, 1126184], ['IN', 1126315, 1126359], ['FOR', 1126388, 1126548], ['MANY', 1127195, 1127304], ['YEARS', 1127347, 1127725], ['ALREADY', 1128307, 1128612]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a891', 'timeSlotRef1': 'ts1352', 'timeSlotRef2': 'ts1355', 'timeStamp1': 1125370, 'timeStamp2': 1128612, 'text': 

[YEAH	(0.91): [    0,    40), AND	(0.40): [  118,   123), IT	(0.71): [  135,   141), REFLECTS	(0.85): [  162,   217), IN	(0.70): [  233,   240), THE	(0.77): [  247,   261), SCHOOLS	(0.85): [  264,   326), HERE	(0.90): [  345,   412), AS	(0.71): [  416,   424), WELL	(0.92): [  428,   517)] []
['YEAH', 1227312, 1227603]
['AND', 1228170, 1228206]
['IT', 1228293, 1228337]
['REFLECTS', 1228490, 1228889]
['IN', 1229006, 1229057]
['THE', 1229107, 1229209]
['SCHOOLS', 1229231, 1229682]
['HERE', 1229820, 1230307]
['AS', 1230336, 1230394]
['WELL', 1230423, 1231070]
[['YEAH', 1227312, 1227603], ['AND', 1228170, 1228206], ['IT', 1228293, 1228337], ['REFLECTS', 1228490, 1228889], ['IN', 1229006, 1229057], ['THE', 1229107, 1229209], ['SCHOOLS', 1229231, 1229682], ['HERE', 1229820, 1230307], ['AS', 1230336, 1230394], ['WELL', 1230423, 1231070]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a897', 'timeSlotRef1': 'ts1485', 'timeSlotRef2': 'ts1492', 'timeStamp1': 1227312, 'timeStamp2': 1231070, 'text': 'Yeah 

[YEAH	(0.90): [    0,    44)] []
['YEAH', 1287323, 1287651]
[['YEAH', 1287323, 1287651]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1397', 'timeSlotRef1': 'ts1549', 'timeSlotRef2': 'ts1551', 'timeStamp1': 1287323, 'timeStamp2': 1287651, 'text': 'yeah', 'words': [], 'phonemes': []}
OH|DID|YOU
OH|DID|YOU
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1401.wav
[OH	(0.81): [    0,    27), DID	(0.68): [   31,    39), YOU	(0.85): [   48,    93)] []
['OH', 1293791, 1293989]
['DID', 1294018, 1294077]
['YOU', 1294143, 1294473]
[['OH', 1293791, 1293989], ['DID', 1294018, 1294077], ['YOU', 1294143, 1294473]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1401', 'timeSlotRef1': 'ts1561', 'timeSlotRef2': 'ts1563', 'timeStamp1': 1293791, 'timeStamp2': 1294473, 'text': 'oh did you', 'words': [], 'phonemes': []}
THAT'S|VERY|SWEET|AS|AN|EMPLOYER|
THATS|VERY|SWEET|AS|AN|EMPLOYER|
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_

[OH	(0.90): [    0,    49), OKAY	(0.79): [   80,   121)] []
['OH', 1361478, 1361835]
['OKAY', 1362060, 1362359]
[['OH', 1361478, 1361835], ['OKAY', 1362060, 1362359]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1424', 'timeSlotRef1': 'ts1630', 'timeSlotRef2': 'ts1633', 'timeStamp1': 1361478, 'timeStamp2': 1362359, 'text': 'oh okay', 'words': [], 'phonemes': []}
YEAH|RIGHT
YEAH|RIGHT
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1425.wav
[YEAH	(0.89): [    0,    44), RIGHT	(0.86): [  129,   172)] []
['YEAH', 1362987, 1363308]
['RIGHT', 1363927, 1364241]
[['YEAH', 1362987, 1363308], ['RIGHT', 1363927, 1364241]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1425', 'timeSlotRef1': 'ts1635', 'timeSlotRef2': 'ts1637', 'timeStamp1': 1362987, 'timeStamp2': 1364241, 'text': 'yeah right', 'words': [], 'phonemes': []}
YEAH|CAUSE|WE|ARE|PAYING|A|LOT|ALREADY
YEAH|CAUSE|WE|ARE|PAYING|A|LOT|ALREADY
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippet

[LIKE	(0.80): [    0,    32), S	(0.78): [   33,    34), VERY	(0.86): [   84,   113), LIKE	(0.57): [  255,   264), TRUE	(0.87): [  277,   317), BLUE	(0.83): [  321,   359), AUSSIES	(0.81): [  374,   431), WHAT	(0.73): [  524,   540), WOULD	(0.68): [  543,   561), YOU	(0.65): [  562,   570), COME	(0.82): [  577,   602), TO	(0.36): [  605,   608), YOUR	(0.69): [  624,   637), MIND	(0.91): [  646,   692)] []
['LIKE', 1436577, 1436809]
['S', 1436817, 1436824]
['VERY', 1437187, 1437398]
['LIKE', 1438430, 1438495]
['TRUE', 1438590, 1438880]
['BLUE', 1438909, 1439185]
['AUSSIES', 1439294, 1439709]
['WHAT', 1440384, 1440501]
['WOULD', 1440522, 1440652]
['YOU', 1440660, 1440719]
['COME', 1440769, 1440951]
['TO', 1440973, 1440995]
['YOUR', 1441111, 1441205]
['MIND', 1441271, 1441605]
[['LIKE', 1436577, 1436809], ['S', 1436817, 1436824], ['VERY', 1437187, 1437398], ['LIKE', 1438430, 1438495], ['TRUE', 1438590, 1438880], ['BLUE', 1438909, 1439185], ['AUSSIES', 1439294, 1439709], ['WHAT', 1440384, 1

[YEAH	(0.93): [    0,    74)] []
['YEAH', 1518110, 1518655]
[['YEAH', 1518110, 1518655]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1475', 'timeSlotRef1': 'ts1784', 'timeSlotRef2': 'ts1787', 'timeStamp1': 1518110, 'timeStamp2': 1518655, 'text': 'yeah', 'words': [], 'phonemes': []}
HAVE|YOU|EVER|BEEN|OFFENDED|BY|PEOPLE'S|TELLING|UH|LIKE|JOKES|YOU|DON'T|FIND|APPROPRIATE
HAVE|YOU|EVER|BEEN|OFFENDED|BY|PEOPLES|TELLING|UH|LIKE|JOKES|YOU|DONT|FIND|APPROPRIATE
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a908.wav
[HAVE	(0.80): [    0,    19), YOU	(0.83): [   24,    44), EVER	(0.79): [   55,    82), BEEN	(0.76): [   89,   106), OFFENDED	(0.84): [  119,   197), BY	(0.85): [  225,   267), PEOPLES	(0.73): [  305,   340), TELLING	(0.85): [  360,   417), UH	(0.86): [  530,   539), LIKE	(0.69): [  552,   577), JOKES	(0.84): [  674,   732), YOU	(0.83): [  733,   766), DONT	(0.87): [  771,   796), FIND	(0.68): [  811,   838), APPROPRIATE	(0.88): [  84

[YES	(0.91): [    0,    87)] []
['YES', 1593361, 1593998]
[['YES', 1593361, 1593998]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a910', 'timeSlotRef1': 'ts1847', 'timeSlotRef2': 'ts1849', 'timeStamp1': 1593361, 'timeStamp2': 1593998, 'text': 'Yes', 'words': [], 'phonemes': []}
YEAH|DO|THEY|VISIT|HIM|VERY|OFTEN
YEAH|DO|THEY|VISIT|HIM|VERY|OFTEN
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a911.wav
[YEAH	(0.96): [    0,   162), DO	(0.80): [  173,   186), THEY	(0.05): [  187,   191), VISIT	(0.58): [  192,   203), HIM	(0.89): [  213,   238), VERY	(0.66): [  273,   285), OFTEN	(0.87): [  296,   370)] []
['YEAH', 1621016, 1622194]
['DO', 1622274, 1622369]
['THEY', 1622376, 1622405]
['VISIT', 1622412, 1622492]
['HIM', 1622565, 1622747]
['VERY', 1623002, 1623089]
['OFTEN', 1623169, 1623707]
[['YEAH', 1621016, 1622194], ['DO', 1622274, 1622369], ['THEY', 1622376, 1622405], ['VISIT', 1622412, 1622492], ['HIM', 1622565, 1622747], ['VERY', 1623002,

[TRAFFIC	(0.88): [    0,    79)] []
['TRAFFIC', 1671028, 1671610]
[['TRAFFIC', 1671028, 1671610]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1523', 'timeSlotRef1': 'ts1939', 'timeSlotRef2': 'ts1941', 'timeStamp1': 1671028, 'timeStamp2': 1671610, 'text': 'traffic', 'words': [], 'phonemes': []}
YES
YES
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1529.wav
[YES	(0.92): [    0,    62)] []
['YES', 1691781, 1692236]
[['YES', 1691781, 1692236]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1529', 'timeSlotRef1': 'ts1949', 'timeSlotRef2': 'ts1951', 'timeStamp1': 1691781, 'timeStamp2': 1692236, 'text': 'yes', 'words': [], 'phonemes': []}
YEAH
YEAH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1530.wav
[YEAH	(0.94): [    0,   163)] []
['YEAH', 1696650, 1697841]
[['YEAH', 1696650, 1697841]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1530', 'timeSlotRef1': 'ts1958', 'timeSlotRef2': 'ts1960', 'timeStamp

[LIKE	(0.90): [    0,    37), Y	(0.71): [   39,    45), YOU	(0.56): [  105,   112), YOU	(0.87): [  116,   140), CANT	(0.74): [  153,   168), REALLY	(0.87): [  169,   217)] []
['LIKE', 1758418, 1758688]
['Y', 1758702, 1758746]
['YOU', 1759183, 1759235]
['YOU', 1759264, 1759438]
['CANT', 1759533, 1759643]
['REALLY', 1759650, 1760000]
[['LIKE', 1758418, 1758688], ['Y', 1758702, 1758746], ['YOU', 1759183, 1759235], ['YOU', 1759264, 1759438], ['CANT', 1759533, 1759643], ['REALLY', 1759650, 1760000]]
{'tiername': 'INT_ Sylvie', 'annoID': 'a1544', 'timeSlotRef1': 'ts2015', 'timeSlotRef2': 'ts2018', 'timeStamp1': 1758418, 'timeStamp2': 1760000, 'text': "like y~ you you can't really", 'words': [], 'phonemes': []}
GET|ACCUSTOMED|TO|UH
GET|ACCUSTOMED|TO|UH
C:/Users/barth/gits/pytorch_wav2vec/test_data/media_snippets/AusESL_MF_002_Stella_anon\INT__Sylvie_a1545.wav
[GET	(0.84): [    0,    31), ACCUSTOMED	(0.85): [   81,   156), TO	(0.94): [  189,   239), UH	(0.93): [  268,   302)] []
['GET', 176069

 +++ DONE +++ 


In [18]:
def display_segment(i):
    ratio = waveform.size(1) / trellis.size(0)
    word = word_segments[i]
    x0 = int(ratio * word.start)
    x1 = int(ratio * word.end)
    print(f"{word.label} ({word.score:.2f}): {x0 / 44100:.3f} - {x1 / 44100:.3f} sec")
    segment = waveform[:, x0:x1]

    return IPython.display.Audio(segment.numpy(), rate=44100)

display_segment(1)

TAMAT (0.78): 0.618 - 0.851 sec


In [8]:
IPython.display.Audio(SPEECH_FILE, rate=44100)

In [None]:
"C:\Users\barth\gits\pytorch_wav2vec\test_data\media_snippets\Cathy_Samun_Wiliang_a1.wav"
"C:\Users\barth\gits\pytorch_wav2vec\test_data\media_snippets\Cathy Samun Wiliang_a1.wav"

"C:\Users\barth\gits\pytorch_wav2vec\test_data\media_snippets\Cathy_Samun_Wiliang_a53.wav"