In [None]:
import os
import whisper
from pyannote.audio import Pipeline
import datetime
import itertools
import numpy as np
from pydub.playback import play
from pydub import AudioSegment
import pandas as pd
import torch
import yaml

class TranscriptLine:
    def __init__(self, start = None, end = None, speaker_dict = {}, speaker_id = None, transcription=None, translation=None):
        self.start = start
        self.end = end
        self.speaker_id = speaker_id
        self.speaker_dict = speaker_dict
        self.transcription = transcription
        self.translation = translation

    def set_speaker_id(self, speaker_id):
        if speaker_id not in self.speaker_dict.keys():
            self.speaker_dict[speaker_id]= speaker_id
        self.speaker_id = speaker_id
            
    
    def set_end(self, end):
        self.end = end
    
    def get_end(self):
        return self.end
    
    def get_start(self):
        return self.start
    
    def get_transcription(self):
        return self.transcription
    
    def get_translation(self):
        return self.translation
    
    def get_speaker(self):
        return self.speaker_dict[self.speaker_id] if self.speaker_id in self.speaker_dict.keys() else None
    
    def append_translation(self, additional_text):
        self.translation += additional_text

    def append_transcription(self, additional_text):
        self.transcription += additional_text

    def to_df(self):
        return pd.DataFrame({
            'start': [self.start],
            'end': [self.end],
            'speaker': [self.get_speaker()],
            'transcription': [self.transcription],
            'translation': [self.translation],
            })

    def __str__(self):
        start = self.start
        end = self.end
        speaker = self.get_speaker()
        transcription = self.transcription
        translation = self.translation
        string_value = f'[{start} ---> {end}] {speaker}: \n Transcription: {transcription} \n Translation: {translation} \n'
        print(string_value)
        return string_value
    
audio_files = [
            #    "De_Dienst_promo",
               "De_Dienst_ep_1",
               "De_Dienst_ep_2",
               "De_Dienst_ep_3",
               "De_Dienst_ep_4",
               "De_Dienst_ep_5",
               "De_Dienst_ep_6"
]

def insert_interval(interval_list, new_interval, speaker_dict, max_interruption=datetime.timedelta(seconds=1.5)):
    """Check whether a new interval has to be created, of if the current interval should be merged with an existing one.

    Args:
        interval_list (_List_): A list of all speaker intervals
        new_interval (_Dict_): The new interval that is to be inserted into the list
        speaker_dict (_Dict_): A dictionary that maps speaker labels to speaker names
        max_interruption (_datetime.timedelta_, optional): The max duration between intervals. If this time is exceeded, the it will be split into two separate intervals. Defaults to datetime.timedelta(seconds=1.5).
    """
    if speaker_dict[new_interval['label']] != None: # if the speaker is known, we can potentially merge the interval with an existing one
        nearest_intervals = list(filter(
            lambda item: item[1]['end'] > new_interval['start'] - max_interruption, enumerate(interval_list)))
        
        mergeable_interval = list(filter(
            lambda item: item[1]['label'] == new_interval['label'] or speaker_dict[item[1]['label']] == None , nearest_intervals))
        
        # print(mergeable_interval)
        
        if len(mergeable_interval) > 0: # if the length of the mergeable intervals is greater than 0, we can merge the new interval with an existing one
            assert len(mergeable_interval) == 1
            idx, _ = mergeable_interval[0]
            interval_list[idx]['end'] = new_interval['end']
        else: # otherwise, we have to create a new interval
            interval_list.append(new_interval)
    else: # if the speaker is not known, we have to create a new interval
        idx, last_interval = max( enumerate(interval_list), 
            key = lambda item: item[1]['end']
        )
        interval_list[idx]['end'] = last_interval['end']


def refine_speaker_intervals(diarization, audio_file, speaker_dict, min_frag_length = 0.5):
    """The speaker intervals produced by the diarization pipeline are somewhat noisy, with many short segments that are not useful for the transcription. 
    This function plays the audio segments and asks the user to identify the speaker if the speaker is not known yet. It also merges segments that are close together and have the same speaker.

    Args:
        diarization (_type_): The diarization produced by the pyannote pipeline.
        audio_file (_type_): the mp3 file used in the diarization.
        speaker_dict (_type_): The mapping between the indices of the distinct speakers and their names. 
        min_frag_length (float, optional): The minimum length of a speaker interval in seconds. Defaults to 0.5.

    Returns:
        _type_: A list of speaker intervals, where each interval is a dictionary with the keys 'start', 'end', and 'label'.
    """
    speaker_interval_list = []
    audio = AudioSegment.from_mp3(audio_file)

    for seg in diarization.itersegments(): # iterate over all segments in the diarization
        assert len(diarization.get_labels(seg)) == 1 # each segment should have exactly one label
        interval = {
            'start': datetime.timedelta(seconds=seg.start),
            'end': datetime.timedelta(seconds=seg.end),
            'label': diarization.get_labels(seg).pop()
        }
        if speaker_dict[interval['label']] == None and seg.end-seg.start > min_frag_length: # if the speaker is not known and the segment is long enough, we ask the user to identify the speaker
            audio_seg = audio[seg.start*1000:seg.end*1000]
            play(audio_seg)
            name = input('Who is this?').strip() or None
            speaker_dict[interval['label']] = name
        if len(speaker_interval_list) == 0: # if the speaker interval list is still empty, we can just add the interval
            speaker_interval_list.append(interval)
        else: # otherwise, we have to check whether the interval can be merged with an existing one
            insert_interval(speaker_interval_list, interval, speaker_dict)
    return speaker_interval_list

def transcribe_and_translate(audio, language):
    """Function to transcribe and translate audio using the Whisper model. Based on https://github.com/openai/whisper/blob/main/notebooks/Multilingual_ASR.ipynb, commit f0083e7eb20d032390e42f6f6039947fa8669c93
    

    Args:
        audio (_String_): location of the audio file to be transcribed and translated
        language (_String_): Language of the speech contained within the audio file, e.g. 'Dutch' or 'Spanish'.

    Returns:
        (Dict, Dict): Two dictionaries, one for the transcription and one for the translation. Each dictionary contains, among other things, a list of segments.
    """
    model = whisper.load_model("medium", device = 'cuda:0')
    print(
        f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
        f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
    )

    options = dict(language=language, beam_size=5, best_of=5, verbose=False)
    transcribe_options = dict(task="transcribe", **options)
    translate_options = dict(task="translate", **options)

    print("---- starting transcription ----")
    transcription = model.transcribe(audio, **transcribe_options)
    print("---- starting translation ----")
    translation = model.transcribe(audio, **translate_options)
    return transcription, translation

def find_speaker_id(speaker_interval_list, line, audio, error_margin = datetime.timedelta(seconds=2)):
    """The intervals produced by the diarization model and the transcriptions/translations are not always perfectly aligned.
    This function tries to find which speaker identified by the diarization model is speaker at the time of the transcription/translation line.

    Args:
        speaker_interval_list (_type_): The speaker intervals as identified by the diarization model.
        line (_type_): The current transcription line for which we want to find the speaker.
        audio (_type_): the audio file used in the diarization.
        error_margin (_type_, optional): The beginning and end points of the transcription and translation lines are do not always match with those of the speaker interval list. Some error margin is therefore needed.  Defaults to datetime.timedelta(seconds=2).

    Returns:
        _type_: _description_
    """
    start = line.get_start()
    end = line.get_end()
    transcript = line.get_transcription()
    overlapping_intervals = list(filter(
        lambda interval: (interval['start'] < start + error_margin and interval['end'] > end - error_margin), speaker_interval_list))
    if len(overlapping_intervals) == 0: # if there are no overlapping intervals, we cannot find a speaker automatically so the user has to identify the speaker manually
        print(f'Segment [{start} ---> {end}] could not be matched with a single speaker \n Transcript: {transcript}')
        audio_seg = audio[start.total_seconds()*1000:end.total_seconds()*1000]
        play(audio_seg)
        name = input('Could not identify the speaker. Please enter manually').strip() or None
        return name if name != None else find_speaker_id(speaker_interval_list, line, audio, error_margin)
    elif len(overlapping_intervals) == 1: # if there is only one overlapping interval, we can return the label of that interval
        return overlapping_intervals[0]['label']
    else: # if there are multiple overlapping intervals, we return the shortest one
        shortest_interval = min(overlapping_intervals, key=lambda interval: interval['end'] - interval['start'])
        return shortest_interval['label']

def merge_transcription_and_translation(transcription, translation, speaker_interval_list, audio_file, speaker_dict):
    """Unfortunately, the transcription and translation segments do not always match perfectly.
    This function tries to merge the two segments into one list of TranscriptLine objects, which contain both the transcription and translation text, as well as the speaker information.

    Args:
        transcription (_type_): The transcription produced by the Whisper model.
        translation (_type_): The translation produced by the Whisper model.
        speaker_interval_list (_type_): The speaker intervals as identified by the diarization model, and cleaned up by the refine_speaker_intervals function.
        audio_file (_type_): The destination of used the audio file.
        speaker_dict (_type_): A dictionary matching the indices in the speaker_interval_list to the speaker names.

    Raises:
        Exception: If somehow during the previous iteration of the while loop no 'transcription', translation' or 'both' lines were popped, variables are not set correctly, which should not be possible.

    Returns:
        _type_: _description_
    """
    transcr_iter = iter(transcription['segments'])
    transl_iter = iter(translation['segments'])
    transcr_line = next(transcr_iter)
    transl_line = next(transl_iter)
    
    audio = AudioSegment.from_mp3(audio_file)
    lines = []

    prev_pop = None

    while transcr_line != None and transl_line != None: # Iterate over both transcription and translation segments until both are empty
        if transcr_line == None or transl_line == None: # If one of the segments is empty, something is wrong and we should investigate
            print(f'help! transcr: {transcr_line}, transl:{transl_line}')
        if prev_pop == 'both' or prev_pop == None: # If during the previous iteration both a transcript and a translation line is popped, we create a new TranscriptLine object
            if abs(transcr_line['start']-transl_line['start']) > 1: # if the starting times of the transcription and translation do not match, we print a warning
                print(f'Starting times are not matching. Check the following lines ---- \n Transcription: {transcr_line["text"]} \n Translation: {transl_line["text"]}')
            line = TranscriptLine(start=datetime.timedelta(seconds=transcr_line['start']), 
                                transcription=transcr_line['text'], 
                                translation=transl_line['text'],
                                speaker_dict=speaker_dict)
        elif prev_pop == 'transcr': # If during the previous iteration only a transcription line was popped, we append the translation text to the current TranscriptLine object
            line.append_transcription(transcr_line['text'])
        elif prev_pop == 'transl': # If during the previous iteration only a translation line was popped, we append the transcription text to the current TranscriptLine object
            line.append_translation(transl_line['text'])
        else:
            raise Exception('This should not be possible')
        
        if abs(transcr_line['end']-transl_line['end']) <= 1: # If the ending times of the transcription and translation match sufficiently, we can set the end time of the TranscriptLine object and pop both a transcript line and a translation line
            line.set_end(datetime.timedelta(seconds=transcr_line['end']))
            speaker_id = find_speaker_id(speaker_interval_list, line, audio)
            line.set_speaker_id(speaker_id)
            lines.append(line)
            try:
                transcr_line = next(transcr_iter)
                transl_line = next(transl_iter)
                prev_pop = 'both'
            except StopIteration:
                break
        elif transcr_line['end'] < transl_line['end']: # If the end time of the transcription line is before the end time of the translation line, we pop another transcription line
            try:
                transcr_line = next(transcr_iter)
                prev_pop = 'transcr'
            except StopIteration:
                break
        else: # If the end time of the translation line is before the end time of the transcription line, we pop another translation line
            try:
                transl_line = next(transl_iter)
                prev_pop = 'transl'
            except StopIteration:
                break
    return lines

def concat_speaker_segments(lines):
    """If the time between two speaker segments with the same speaker is less than 2 seconds, we merge the two segments into one.

    Args:
        lines (List): the speaker segments that are to be concatenated, produced by the merge_transcription_and_translation function.

    Returns:
        panda.Dataframe: A dataframe containing the concatenated speaker segments.
    """
    segments = []
    segment = lines[0].to_df()

    for l in lines[1:]:
        print(segment)
        if segment.at[0, 'speaker'] == l.get_speaker() and (segment.at[0, 'end'] - l.get_start()) < datetime.timedelta(seconds=2):
            print(f'merging segment:')
            print(f'{segment}')
            print('with:')
            print(f'{l.to_df()}')
            print(' ')
            segment.at[0, 'transcription'] = segment.at[0, 'transcription'] + l.get_transcription()
            segment.at[0, 'translation'] = segment.at[0, 'translation'] + l.get_translation()
            segment.at[0, 'end'] = l.get_end()
        else:
            segments.append(segment)
            segment = l.to_df()
            

    segments.append(segment)
    return pd.concat(segments)

def add_seg_id(df, file_name):
    """Function to add a segment ID to each row of the dataframe. 

    Args:
        df (pandas.DataFrame): The dataframe to which the segment ID is to be added.
        file_name (String): The name of the audio file, which is used to create the segment ID.

    Returns:
        pandas.DataFrame: A dataframe with an additional column 'seg_id', which contains the segment ID for each row.q
    """
    ids = [file_name+'_seg_'+str(i) for i in range(df.shape[0])]
    df.loc[:, 'seg_id'] = ids
    col_names = df.columns.tolist()
    new_order = col_names[-1:] + col_names[:-1] 
    df = df[new_order]
    return df


def change_time_to_abs_values(df, ep_nr):
    """In order to preserve the chronological order of the speaker intervals across multiple episodes, we add a full day to the start and end times with each consecutive episode.

    Args:
        df (pandas.dataframe): The dataframe containing the speaker intervals.
        ep_nr (Integer): the number of the episode that is being processed. 

    Returns:
        pandas.Dataframe: A dataframe with speaker intervals where the start and and times are updated. 
    """
    if ep_nr == 'promo':
        start_time = datetime.date(2024, 12, 31)
    else:
        start_time = datetime.date(2025, 1, int(ep_nr))
    print(df['start'])
    df['start'] = df['start'].apply(lambda x:  datetime.datetime.combine(start_time, (datetime.datetime.min + x).time()))
    df['end'] = df['end'].apply(lambda x: datetime.datetime.combine(start_time, (datetime.datetime.min + x).time()))
    return df

            

def print_df(df, file):
    """Function to print a dataframe to a csv file. The file is saved in the transcriptions/dataframes directory.

    Args:
        df (pandas.Dataframe): The dataframe to be printed.
        file (String): The name of the file to which the dataframe is to be printed, without the .csv extension.
    """
    full_path = "./transcriptions/dataframes/" + file + '.csv'
    df.to_csv(full_path, sep=';', index=False, header=True)


def print_lines(lines, file):
    """Function to print a list of TranscriptLine objects to a text file. The file is saved in the transcriptions/text directory.

    Args:
        lines (List): A list of TranscriptLine objects to be printed.
        file (String): The name of the file to which the lines are to be printed, without the .txt extension.
    """
    full_path = "./transcriptions/text/" + file + '.txt'
    with open(full_path, "w") as text_file:
        for line in lines:
            text_file.write(str(line))

def print_convo(convo, file):
    """Function to print a dataframe is the form of a 'conversation', meaning that each row contains the start and end time, the speaker, the transcription and the translation of a segment. The file is saved in the transcriptions/text directory.

    Args:
        convo (_type_): A dataframe containing the conversation to be printed. The dataframe should contain the columns 'start', 'end', 'speaker', 'transcription' and 'translation'.
        file (_type_): The name of the file to which the conversation is to be printed, without the .txt extension.
    """
    full_path = "./transcriptions/text/" + file + '.txt'
    with open(full_path, "w") as text_file:
        for idx, row in convo.iterrows():
            text = f'[{row["start"]} ---> {row["end"]}] {row["speaker"]}: \n Transcription: {row["transcription"]} \n Translation: {row["translation"]} \n'
            text_file.write(text)   

# Load secrets from a yaml file
secrets = yaml.load(open('./secrets.yaml'), Loader=yaml.FullLoader)      



In [32]:
# file = audio_files[0]
for file in audio_files:
    print(f"processing {file}")

    audio_file = f"./audio_files/uncut/{file}.mp3"
    # instantiate the pipeline for diarizing audio fragment
    pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token=secrets['hugging_face']['use_auth_token'])
    pipeline = pipeline.to(torch.device('cuda:0'))

    # run the pipeline on an audio file
    max_speakers = 5

    print('---- diarizing audio fragment ----')
    diarization = pipeline(audio_file, min_speakers=1, max_speakers=max_speakers)

    speaker_dict = {
    f'SPEAKER_0{n}':None for n in range(max_speakers)
    }
    speaker_dict['UNKNOWN'] = 'unknown'

    print('---- turning fragments into speaker intervals ----')
    speaker_interval_list = refine_speaker_intervals(diarization, audio_file, speaker_dict)


    language = "Dutch"

    print('---- transcribing and translating audio fragment ----')
    transcription, translation = transcribe_and_translate(audio_file, language)

    print('---- merging everything ----')
    lines = merge_transcription_and_translation(transcription, translation, speaker_interval_list, audio_file, speaker_dict)

    print('---- concatenating speaker segments')
    convo  = concat_speaker_segments(lines)

    ep_nr = file.split('_')[-1] 
    convo = change_time_to_abs_values(convo, ep_nr)
    convo = add_seg_id(convo, file)

    print('---- exporting results ----')

    print_df(convo, file)
    print_convo(convo, file)



# with open("./transcriptions/"+file+".txt", "w") as text_file:
#     for line in result['segments']:
#         text_file.write(str(line['text']))
#         text_file.write('\n')

processing De_Dienst_ep_1
---- diarizing audio fragment ----
---- turning fragments into speaker intervals ----


Input #0, wav, from '/tmp/tmp36m9sql5.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.66, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.59 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpet3txhfz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.67, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.61 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpvw8sf7qg.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.68, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.61 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp1ysv9lqz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.86, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.78 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp7uzlve6l.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.96, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.88 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpdkh25pgx.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.81, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.74 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpslhaex9k.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.24, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.19 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


100%|██████████| 177930/177930 [30:44<00:00, 96.44frames/s] 


---- starting translation ----


100%|██████████| 177930/177930 [08:54<00:00, 332.79frames/s]


---- merging everything ----
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik vond het schokkend om opeens de privacy van mensen te schenden. 
 Translation:  What I ...
Segment [0:08:04 ---> 0:08:20] could not be matched with a single speaker 
 Transcript:  Ik vond het schokkend om opeens de privacy van mensen te schenden.


Input #0, wav, from '/tmp/tmpy2d6pahu.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:16.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  15.93 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat ik dat hoorde. 
 Translation:  That I heard that.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Wat ik heel bijzonder vond. 
 Translation:  I found it ...
Starting times are not matching. Check the following lines ---- 
 Transcription:  En daar kun je niet over praten. 
 Translation:  And you can't talk about that.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat is wel een van de zaken die we binnen het dienst echt goed voor elkaar hebben. 
 Translation:  No, that's really in there.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Het is even te denken of ik een voorbeeld kan noemen. 
 Translation:  Just to think if I can give an example.
Segment [0:11:51 ---> 0:11:55] could not be matched with a single speaker 
 Transcript:  Het is even te denken of ik een voorbeeld kan noemen.


Input #0, wav, from '/tmp/tmpcl7u2dc9.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:11:51 ---> 0:11:55] could not be matched with a single speaker 
 Transcript:  Het is even te denken of ik een voorbeeld kan noemen.


Input #0, wav, from '/tmp/tmpjtqqkh4w.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:11:51 ---> 0:11:55] could not be matched with a single speaker 
 Transcript:  Het is even te denken of ik een voorbeeld kan noemen.


Input #0, wav, from '/tmp/tmprt1u84h6.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Er werden mensen geëxecuteerd. 
 Translation:  Colleagues of mine had to keep that up every day.
Segment [0:12:53 ---> 0:12:59] could not be matched with a single speaker 
 Transcript:  Hoe ziet een gemiddelde werkdag voor Bart er eigenlijk uit? Dat is behoorlijk saai denk ik als ik het ga vertellen.


Input #0, wav, from '/tmp/tmpl7pub0tq.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:12:53 ---> 0:12:59] could not be matched with a single speaker 
 Transcript:  Hoe ziet een gemiddelde werkdag voor Bart er eigenlijk uit? Dat is behoorlijk saai denk ik als ik het ga vertellen.


Input #0, wav, from '/tmp/tmp_g9kdyw_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dan dat ik er wel bij betrokken was. 
 Translation:  Back to the case about the student and explosives.
Segment [0:15:15 ---> 0:15:23] could not be matched with a single speaker 
 Transcript:  Dan dat ik er wel bij betrokken was. Terug naar de case over de student en explosieven.


Input #0, wav, from '/tmp/tmp9hvdcv1m.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Oké, en toen was ik dus aan de beurt. 
 Translation:  This is everything there is.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik kan me voorstellen dat dit wel iets is dat reden tot zorgen gaat geven. 
 Translation:  That will give reason for concern.
Segment [0:18:13 ---> 0:18:25] could not be matched with a single speaker 
 Transcript:  Of hangt er daadwerkelijk iets onder waar iets aan gedaan moet worden? Wat zou je inzetten?


Input #0, wav, from '/tmp/tmpo10nlyuz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:19:19 ---> 0:19:23] could not be matched with a single speaker 
 Transcript:  Ja. Wat zijn mijn opties daarin?


Input #0, wav, from '/tmp/tmpg8utoeh5.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Oké, dus er is een Sander van der Land over wie ik meer te weten wil komen. 
 Translation:  What are my options in there?
Starting times are not matching. Check the following lines ---- 
 Transcription:  Om te weten wat ik kan en vooral mag... 
 Translation:  ... about whom I want to know more.
Starting times are not matching. Check the following lines ---- 
 Transcription:  In 2018 is een nieuwe wet in werking getreden... 
 Translation:  Yes, exactly.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Maar ik zit hier natuurlijk niet voor niets... 
 Translation:  ... because I want to know what my options are ...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik mag als burger liefst bed meer? 
 Translation:  ... from three years ago, for example ...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Een goe

Input #0, wav, from '/tmp/tmph16qars9.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik zou eigenlijk zeggen, blijf je vooral afvragen wat je met de inzet van een bouwmiddel wil bereiken. 
 Translation:  ... does Ton have one last tip for me?
Starting times are not matching. Check the following lines ---- 
 Transcription:  MUZIEK 
 Translation:  ... and what you want to get out of it.
Segment [0:28:10 ---> 0:28:19] could not be matched with a single speaker 
 Transcript:  MUZIEK


Input #0, wav, from '/tmp/tmpiadehi3x.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  MUZIEK 
 Translation:  This was the first episode of The Dienst ...
Segment [0:28:42 ---> 0:28:54] could not be matched with a single speaker 
 Transcript:  MUZIEK Dit was de eerste aflevering van De Dienst, een podcast van de AIVD.


Input #0, wav, from '/tmp/tmp_jw7ii0g.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  MUZIEK 
 Translation:  Thank you for watching!
---- concatenating speaker segments
   start             end speaker  \
0 0 days 0 days 00:00:11    Bart   

                                       transcription  \
0   De eerste keer dat ik een telefoon tap, dat i...   

                                         translation  
0   The first time I heard a phone call, that I c...  
merging segment:
   start             end speaker  \
0 0 days 0 days 00:00:11    Bart   

                                       transcription  \
0   De eerste keer dat ik een telefoon tap, dat i...   

                                         translation  
0   The first time I heard a phone call, that I c...  
with:
            start             end speaker  \
0 0 days 00:00:11 0 days 00:00:18    Bart   

                                       transcription  \
0   Vond ik echt schokkend. Ik dacht van hé, ik z...   

               

Input #0, wav, from '/tmp/tmp4ijozvj3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.84, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.77 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpgg50o536.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.17, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.11 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp3aaaqh_4.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.08, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.00 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp2xsz31nr.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.76, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.70 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpgl8rtq6n.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:18.63, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  18.58 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


100%|██████████| 157505/157505 [08:43<00:00, 300.63frames/s]


---- starting translation ----


100%|██████████| 157505/157505 [07:38<00:00, 343.66frames/s]


---- merging everything ----
Starting times are not matching. Check the following lines ---- 
 Transcription:  Zij is namelijk ooit als referent opgetreden voor een collega. 
 Translation:  Is known to us.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Maar als we even op een rijtje zitten, wat hebben we nu? 
 Translation:  That's a while ago.
Starting times are not matching. Check the following lines ---- 
 Transcription:  De meeste bekeerlingen hoeven niet radicaal te zijn. 
 Translation:  Because that's how you know him.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik zou wel willen weten wat zijn verhouding is tot die radicale prediker. 
 Translation:  Always discuss with your colleague.
Starting times are not matching. Check the following lines ---- 
 Transcription:  We willen dus praten met de docent en Linda. 
 Translation:  Yes.
Segment [0:08:49 ---> 0:08:56] could not be matched with a single speaker 
 Tra

Input #0, wav, from '/tmp/tmpf88k2zv_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:08:49 ---> 0:08:56] could not be matched with a single speaker 
 Transcript:  We willen dus praten met de docent en Linda. Maar dat ga ik niet zelf doen.


Input #0, wav, from '/tmp/tmpikpl5gkh.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dit is analist Leonie. 
 Translation:  I have to see the big picture of all those investigations.
Segment [0:13:48 ---> 0:13:53] could not be matched with a single speaker 
 Transcript:  Oké, en nu is dit dan het punt waarop de alarmbellen gaan rinkelen?


Input #0, wav, from '/tmp/tmpu59830zi.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.94 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dus ga op zoek naar wat je niet weet. 
 Translation:  So go looking for what you do not know.
Segment [0:16:10 ---> 0:16:19] could not be matched with a single speaker 
 Transcript:  Dus ga op zoek naar wat je niet weet. Die vragen worden gesteld door een acquisiteur.


Input #0, wav, from '/tmp/tmpla_wux41.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Je moet je voorstellen dat de dienst een interesse heeft in een persoon of een organisatie. 
 Translation:  You have to imagine that ...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik had er ook mijn studie op aangepast. 
 Translation:  Yes?
Starting times are not matching. Check the following lines ---- 
 Transcription:  Maar meestal zeg je wel waar je vandaan komt. 
 Translation:  you do tell where you come from.
Starting times are not matching. Check the following lines ---- 
 Transcription:  En probeer toch altijd wel een manier te vinden om... 
 Translation:  I am really busy with the assignment.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je moet ook nieuwsgierig zijn. 
 Translation:  Yes, and you also have to be curious.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Hoe gaat zo'n gesprek

Input #0, wav, from '/tmp/tmpxj41d2b1.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  omdat er toch nog wat interessants gekomen is... 
 Translation:  Hey Liesbeth, this is Bart.
Segment [0:25:07 ---> 0:25:15] could not be matched with a single speaker 
 Transcript:  omdat er toch nog wat interessants gekomen is... uit de gesprekken van vandaag. Er is een flart van een ISIS-vlag gezien.


Input #0, wav, from '/tmp/tmpzy_gcj0z.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- concatenating speaker segments
   start             end speaker  \
0 0 days 0 days 00:00:04  Andrea   

                                       transcription  \
0   Ja, dat kan wel spannend zijn. Ben je weleens...   

                                         translation  
0   Yes, that can be exciting. Have you ever been...  
merging segment:
   start             end speaker  \
0 0 days 0 days 00:00:04  Andrea   

                                       transcription  \
0   Ja, dat kan wel spannend zijn. Ben je weleens...   

                                         translation  
0   Yes, that can be exciting. Have you ever been...  
with:
            start             end speaker transcription translation
0 0 days 00:00:04 0 days 00:00:05  Andrea           Ja.        Yes.
 
   start             end speaker  \
0 0 days 0 days 00:00:05  Andrea   

                                       transcription  \
0   Ja, dat kan wel spannend zijn. Ben je weleens...   

                        

Input #0, wav, from '/tmp/tmpjryc805l.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.07, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.99 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpp7rjte09.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.27, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.21 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpz5whs2zl.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.18, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.11 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp48y7o0nb.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.01, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp5c95og7k.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.09, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.03 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


100%|██████████| 215243/215243 [10:58<00:00, 326.72frames/s]


---- starting translation ----


100%|██████████| 215243/215243 [09:35<00:00, 374.12frames/s]


---- merging everything ----
Segment [0:00:05.160000 ---> 0:00:30.880000] could not be matched with a single speaker 
 Transcript:  En we hebben elkaar keurige dag gezegd en ik zat echt naast hem en ik dacht jij moest eens weten wat ik van jou weet en jij weet niets van mij. Wat doe je als je het vermoeden hebt dat iemand een gevaar kan zijn?


Input #0, wav, from '/tmp/tmp3xctc90a.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:25.72, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  25.65 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:00:05.160000 ---> 0:00:30.880000] could not be matched with a single speaker 
 Transcript:  En we hebben elkaar keurige dag gezegd en ik zat echt naast hem en ik dacht jij moest eens weten wat ik van jou weet en jij weet niets van mij. Wat doe je als je het vermoeden hebt dat iemand een gevaar kan zijn?


Input #0, wav, from '/tmp/tmp3m73yjr5.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:25.72, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  25.65 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Mijn naam is Lies Petrasker. 
 Translation:  My name is Lies Petrasker.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dag 3, het voelt al bijna als routine om de bus en trein naar Zoetermeer te pakken. 
 Translation:  Day 3.
Segment [0:01:16.440000 ---> 0:01:30.760000] could not be matched with a single speaker 
 Transcript:  Dag 3, het voelt al bijna als routine om de bus en trein naar Zoetermeer te pakken.


Input #0, wav, from '/tmp/tmpyp6bg9cc.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.32, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  14.25 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:01:34.120000 ---> 0:01:40.040000] could not be matched with a single speaker 
 Transcript:  Opeens vroeg ik me af, mag ik hem eigenlijk gedag zeggen als we elkaar buiten tegenkomen?


Input #0, wav, from '/tmp/tmp0nxzv10q.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.92, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.85 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  hier maak ik me wel een beetje zorgen om. 
 Translation:  The teacher caught a glimpse of an ISIS flag on the phone.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Maar hij heeft in het voorbijgaan, dat was eigenlijk het eerste element wat hij gezien had, dat hij had gezien dat er een zwarte ISIS-vlag op zijn screensaver stond van zijn telefoon. 
 Translation:  But he passed it.
Segment [0:06:27.560000 ---> 0:06:57.560000] could not be matched with a single speaker 
 Transcript:  En het ging echt alleen maar over explosieve stoffen. Toen de docent hem daarop aansprak van waarom vraag je alleen maar dat en waarom heb je het niet over de andere zaken die passen in het curriculum van dit hele trimester. Maakte Sander de indruk dat hij eigenlijk niet meer zo geïnteresseerd is in de studie zelf en dat hij gewoon concrete informatie wil hebben. Dat leek erop alsof hij die stu

Input #0, wav, from '/tmp/tmp7c95krk8.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:30.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  29.92 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:07:01.560000 ---> 0:07:09.560000] could not be matched with a single speaker 
 Transcript:  Ethyldiamine. Waterstofperoxyden van een hoge concentratie.


Input #0, wav, from '/tmp/tmphv4qy0gx.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:07:09.560000 ---> 0:07:47.560000] could not be matched with a single speaker 
 Transcript:  En wat ook wel opvallend was is dat de docent Sander een keer heeft betrapt met een fles nitromethaan bij de chemikalie kast. En Sander zette die fles snel terug toen hij merkte dat de docent zag dat hij dat in zijn handen had. Ik heb een van onze explosieve experts deze namen van deze stoffen voorgelegd. En zij gaf aan dat dit echt wel elementen zijn, bestanddelen zouden kunnen zijn voor een explosief. Dan is er nog een ander element.


Input #0, wav, from '/tmp/tmp6y9b2amp.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:38.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  37.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  En heel vaak zit daar een soort jihadistische component in. 
 Translation:  And very often there is a kind of jihadist component in it.
Starting times are not matching. Check the following lines ---- 
 Transcription:  En omdat deze jongen het Arabisch verstond en hoorde waarover gezongen werd, kon je ook echt horen dat het jihadisme ging. 
 Translation:  And because this boy understood Arabic and heard what was being sung.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Die student heeft hem daarop aangesproken. 
 Translation:  That student talked to him about it.
Segment [0:08:34.560000 ---> 0:08:45.560000] could not be matched with a single speaker 
 Transcript:  Die student heeft hem daarop aangesproken. En Sander die zei dat hij zijn bek moest houden omdat hij een nep moslim was.


Input #0, wav, from '/tmp/tmpmnsa8k92.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:11.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  10.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:08:48.560000 ---> 0:08:58.560000] could not be matched with a single speaker 
 Transcript:  Het begint nu wel echt een serieus verhaal te worden. Van mijn verhaal dat het ook gewoon een geïnteresseerde student kan zijn, blijft weinig over.


Input #0, wav, from '/tmp/tmpz5q5lzzz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik denk wel dat er nu wel heel veel losse puzzelstukjes zijn die toch wel gevaarlijk kunnen zijn. 
 Translation:  From my story that it can be just an interested student.
Segment [0:08:59.560000 ---> 0:09:13.560000] could not be matched with a single speaker 
 Transcript:  Ik denk wel dat er nu wel heel veel losse puzzelstukjes zijn die toch wel gevaarlijk kunnen zijn. Dan krijg ik het beeld van een jonge boze jongen.


Input #0, wav, from '/tmp/tmpsekl72h_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.92 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  In combinatie met dat hij de bekeerlingen van de jonge boze jongen, 
 Translation:  Who has been known for a long time that he keeps these kinds of ideas.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik vermoed als contact met deze prediger, Mehmet. 
 Translation:  Then ...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dus de intensie, ik heb een bepaald gedachtegoed en ik ben bereid om daar bepaalde dingen voor te gaan doen. 
 Translation:  So the intention, I want ...
Segment [0:12:19.560000 ---> 0:12:26.560000] could not be matched with a single speaker 
 Transcript:  Dus de intensie, ik heb een bepaald gedachtegoed en ik ben bereid om daar bepaalde dingen voor te gaan doen.


Input #0, wav, from '/tmp/tmpt4xgpi40.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik zou eigenlijk nu wel eens willen horen waar die gesprekken over gaan met Mehmet. 
 Translation:  So potential.
Segment [0:12:51.560000 ---> 0:13:05.560000] could not be matched with a single speaker 
 Transcript:  Ik zou eigenlijk nu wel eens willen horen waar die gesprekken over gaan met Mehmet. Ja, snap ik. Ik ook. En dat zegt mijn gevoel ook, maar ik moet zeggen, ik mis nog wel iets in de argumentatie.


Input #0, wav, from '/tmp/tmpopuqr0k3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik denk wel dat het goed is dat we de gesprekken met Mehmet hebben. 
 Translation:  Me too.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Daar moet iemand gaan voordat je als AFD wel zegt. 
 Translation:  I think this is quite a lot, all together.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Het is geen wiskunde. 
 Translation:  And yes, it can be a provoking 20-year-old.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik vind dit best wel veel allemaal bij elkaar. 
 Translation:  But how bond should someone make it before it is given?
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik ga praten met een audiobewerker. 
 Translation:  I'm going to talk to an audio processor.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je hoort de

Input #0, wav, from '/tmp/tmpv67mfpxh.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.92 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:24:15.560000 ---> 0:24:22.560000] could not be matched with a single speaker 
 Transcript:  Wat kan ik eigenlijk verwachten van zo'n tap? Als Dana zo meteen Arsander gaat luisteren, waar gaat ze op letten?


Input #0, wav, from '/tmp/tmprmi26o02.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:24:47.560000 ---> 0:24:56.560000] could not be matched with a single speaker 
 Transcript:  Wat wordt er besproken? En dan hopen we op een uitglijder.


Input #0, wav, from '/tmp/tmp9e9vdr4m.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Bijvoorbeeld. 
 Translation:  When are you going to call him?
Segment [0:25:02.560000 ---> 0:25:07.560000] could not be matched with a single speaker 
 Transcript:  Bijvoorbeeld. Wanneer ga je aan de bel trekken?


Input #0, wav, from '/tmp/tmp0c66l2yp.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Naast de tappen loopt het Oostind onderzoek natuurlijk ook nog steeds. 
 Translation:  The Ossint research
Starting times are not matching. Check the following lines ---- 
 Transcription:  Nou, doordat ik hier bij deze afdeling werk, 
 Translation:  Well, because I work here at this department, it's not that I'm very paranoid about it.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Hoe kijk jij naar een zaak als dit? Wat denk je hiervan? 
 Translation:  How do you look at a case like this? What do you think of this?
Segment [0:33:07.560000 ---> 0:33:15.560000] could not be matched with a single speaker 
 Transcript:  Hoe kijk jij naar een zaak als dit? Wat denk je hiervan? Nou ja, dit is denk ik een zaak die heel typisch is


Input #0, wav, from '/tmp/tmpmrz4y2ik.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:34:24.560000 ---> 0:34:38.560000] could not be matched with a single speaker 
 Transcript:  Ja, zeg het maar. En met die overweging zit mijn derde dag bij de dienst erop.


Input #0, wav, from '/tmp/tmpw0__fogp.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:34:53.560000 ---> 0:35:18.560000] could not be matched with a single speaker 
 Transcript:  Hoeveel zorgen moeten we ons maken? Je hoort het in aflevering 4 van de dienst. Dit was de derde aflevering van de dienst, een podcast van de AIVD. Abonneer je nu zodat je niets van het onderzoek hoeft te missen en laat ons vooral weten wat je van deze serie vindt via een recensie in je favoriete podcast-app.


Input #0, wav, from '/tmp/tmp2lx0__k2.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:25.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  24.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  TV Gelderland 2021 
 Translation:  .
---- concatenating speaker segments
   start                    end speaker  \
0 0 days 0 days 00:00:05.160000    Dana   

                                       transcription  \
0   Ik heb eens een kwartier lang naast een targe...   

                                         translation  
0   I was sitting next to his target for a quarte...  
                   start                    end         speaker  \
0 0 days 00:00:05.160000 0 days 00:00:30.880000  Dana, Liesbeth   

                                       transcription  \
0   En we hebben elkaar keurige dag gezegd en ik ...   

                                         translation  
0   Oh yes. And we said nice things to each other...  
                   start                    end   speaker  \
0 0 days 00:00:30.880000 0 days 00:00:42.440000  Liesbeth   

                                       transcription 

Input #0, wav, from '/tmp/tmpreonwh2u.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.59, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.50 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpuqnj9ink.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.06, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.98 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpfo49y8p3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.67, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.61 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpuqu12t2u.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:17.08, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  17.01 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp5choplub.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.75, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.66 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpk1f72rpt.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.78, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.70 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp0ji_zqc5.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.54, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.48 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp6u9y9h64.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.79, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.71 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpqcg12c94.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.76, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.71 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


100%|██████████| 210557/210557 [14:03<00:00, 249.70frames/s]


---- starting translation ----


100%|██████████| 210557/210557 [09:37<00:00, 364.29frames/s]


---- merging everything ----
Segment [0:00:05 ---> 0:00:08] could not be matched with a single speaker 
 Transcript:  Nee, dat heb je nooit verspeeld.


Input #0, wav, from '/tmp/tmpqld5i70g.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:00:05 ---> 0:00:08] could not be matched with a single speaker 
 Transcript:  Nee, dat heb je nooit verspeeld.


Input #0, wav, from '/tmp/tmpp2e92yt4.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:00:05 ---> 0:00:08] could not be matched with a single speaker 
 Transcript:  Nee, dat heb je nooit verspeeld.


Input #0, wav, from '/tmp/tmpb4tjwy74.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Nooit. 
 Translation:  How far can you go to prevent an attack?
Segment [0:00:08 ---> 0:00:21] could not be matched with a single speaker 
 Transcript:  Nooit. Hoe ver kun je gaan om een aanslag te voorkomen?


Input #0, wav, from '/tmp/tmp91_ar1zf.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:13.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  12.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Om bevolgens toestemming te gaan vragen om een bijzondere inrichting in te gaan zetten. 
 Translation:  ...and then ask permission...
Starting times are not matching. Check the following lines ---- 
 Transcription:  De last is eigenlijk een soort basis document wat jij schrijft als bewerker. 
 Translation:  The complaint is actually a kind of basic document...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je hebt het heel kort al even met Ton over gehad. 
 Translation:  You briefly discussed it with Ton.
Starting times are not matching. Check the following lines ---- 
 Transcription:  De proportionaliteit, dus in hoeverre weegt de inzet van dit middel op... 
 Translation:  Yes.
Starting times are not matching. Check the following lines ---- 
 Transcription:  tegen de schending van de privacy van iemand. 
 Translation:  ...against the deprivation of someone's privacy?
S

Input #0, wav, from '/tmp/tmpcs2j4h_c.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:05:05 ---> 0:05:26] could not be matched with a single speaker 
 Transcript:  Hij onderhoudt contact met een leider van ISIS die een trainingskamp leidt in Libië, Derna om precies te zijn. Deze leider van ISIS, die heet Anas al-Maghribi. Die heeft al langer de warme belangstelling van collega's uit een ander team binnen de dienst.


Input #0, wav, from '/tmp/tmpy2owlgow.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:21.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  20.95 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat is best een hoop wat dan in één keer bij elkaar komt. 
 Translation:  ...communication with others also comes forward.
Segment [0:06:02 ---> 0:06:08] could not be matched with a single speaker 
 Transcript:  Dat is best een hoop wat dan in één keer bij elkaar komt. Dat is best een hoop wat dan in één keer bij elkaar komt.


Input #0, wav, from '/tmp/tmpphjg7o69.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.96 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Die contacten heeft met een leider van ISIS. 
 Translation:  ...has contacts with a leader of ISIS...
Starting times are not matching. Check the following lines ---- 
 Transcription:  Kenners heeft van chemische stoffen, door zijn college. 
 Translation:  ...has knowledge of chemical substances through his college...
Segment [0:07:11 ---> 0:07:23] could not be matched with a single speaker 
 Transcript:  Dat sluit een beetje aan bij die leider in Libië. De internettap wijst uit dat Sander op zoek is naar vluchten naar Cairo.


Input #0, wav, from '/tmp/tmpjt5r09mj.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:07:39 ---> 0:07:47] could not be matched with a single speaker 
 Transcript:  ...om zich daar aan te sluiten bij de djiedistische strijd. Ja, behalve dat we hem hebben horen roepen dat hij de oorlog naar Nederland gaat halen.


Input #0, wav, from '/tmp/tmp_yx4tutl.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dus hij gaat niet volgende week? 
 Translation:  ...that Sandor is already gone.
Segment [0:08:41 ---> 0:08:44] could not be matched with a single speaker 
 Transcript:  Naar Cairo. Oké.


Input #0, wav, from '/tmp/tmpx9bt0gfk.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Dit is het moment waarop echt duidelijk is dat Sander goed fout is. 
 Translation:  This is the moment when it is really clear...
Segment [0:09:02 ---> 0:09:08] could not be matched with a single speaker 
 Transcript:  Dit is het moment waarop echt duidelijk is dat Sander goed fout is.


Input #0, wav, from '/tmp/tmp1p_s2lnk.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:09:36 ---> 0:09:43] could not be matched with a single speaker 
 Transcript:  We onderhouden natuurlijk hele goede contacten met andere westerse diensten.


Input #0, wav, from '/tmp/tmpssm5j7l7.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Zoals? 
 Translation:  We know something about their potential.
Segment [0:12:02 ---> 0:12:06] could not be matched with a single speaker 
 Transcript:  De privacy schending weegt heel zwaar.


Input #0, wav, from '/tmp/tmp6mxc2xjr.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:12:02 ---> 0:12:06] could not be matched with a single speaker 
 Transcript:  De privacy schending weegt heel zwaar.


Input #0, wav, from '/tmp/tmpjrutmi91.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Juridisch gezien, maar ook emotioneel gezien. 
 Translation:  But there is of course a point in a case.
Segment [0:12:06 ---> 0:12:14] could not be matched with a single speaker 
 Transcript:  Juridisch gezien, maar ook emotioneel gezien. Ik merkte ook aan de verhalen die mensen vertellen, wat het met ze doet als ze iemand af luisteren.


Input #0, wav, from '/tmp/tmp7hbiptpf.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Je bent internationaal. 
 Translation:  Go.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je zei natuurlijk in de eerste aflevering dat jullie een aantal zware jaren hebben gehad als het gaat over terrorisme. 
 Translation:  Go.
Segment [0:16:49 ---> 0:17:01] could not be matched with a single speaker 
 Transcript:  En je woont in een land en je woont op een continent in de Europese Unie waar open grenzen zijn. Wat we ook belangrijk vinden dat die grenzen open blijven.


Input #0, wav, from '/tmp/tmprziyxefu.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.95 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik ben blij dat ik de dg nu ben. 
 Translation:  I'm glad I'm the DG now.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Een technisch verhaal dus. 
 Translation:  So a technical story.
Segment [0:23:21 ---> 0:23:26] could not be matched with a single speaker 
 Transcript:  Hoe keek Mark daarnaar? De discussie heeft best wel wat met me gedaan.


Input #0, wav, from '/tmp/tmpxild43_6.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:24:10 ---> 0:24:21] could not be matched with a single speaker 
 Transcript:  en die zijn leidend eigenlijk in de keuze voor waar we gaan zitten. Dus als wij als AIVD, dat staat ook in de jaarverslagen... we doen onder andere onderzoek naar Rusland...


Input #0, wav, from '/tmp/tmpl4c4l2z9.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:11.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  10.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  tegengeluid te mobiliseren. 
 Translation:  And for the services, they are bound to the law and we can hardly defend the law.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Die datastroom waar Marc het over heeft... 
 Translation:  and that only the right data remains.
Segment [0:29:51 ---> 0:29:54] could not be matched with a single speaker 
 Transcript:  Kan dat ook zonder dat telefoonnummer?


Input #0, wav, from '/tmp/tmp8y76cntj.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.93 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:30:06 ---> 0:30:18] could not be matched with a single speaker 
 Transcript:  Precies. Nou, Wiesbeth.


Input #0, wav, from '/tmp/tmp5uw6twe0.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Wat is kabelsinterceptie nou eigenlijk? 
 Translation:  And if you got that right, then you are accepted, because we are here again.
Segment [0:30:21 ---> 0:30:27] could not be matched with a single speaker 
 Transcript:  Wat is kabelsinterceptie nou eigenlijk? Als je die goed hebt, dan ben je aangenomen als IVD'er.


Input #0, wav, from '/tmp/tmpr8zwajrb.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik vond dat wel een van de ingewikkeldste verhalen om te begrijpen. 
 Translation:  I thought that was one of the most complicated stories to understand.
Starting times are not matching. Check the following lines ---- 
 Transcription:  In het land. 
 Translation:  Yes, why?
Starting times are not matching. Check the following lines ---- 
 Transcription:  Maar wat me nog wel duidelijk is... 
 Translation:  Yes.
Segment [0:31:38 ---> 0:31:41] could not be matched with a single speaker 
 Transcript:  En zou jij het in dit geval wel of niet inzetten?


Input #0, wav, from '/tmp/tmp77h860a3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Er is veel gebeurd deze dag. 
 Translation:  There is a lot going on today.
Segment [0:33:57 ---> 0:34:07] could not be matched with a single speaker 
 Transcript:  hoor je in de volgende aflevering. Dit was de vierde aflevering van De Dienst.


Input #0, wav, from '/tmp/tmpnv3k1ien.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.93 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Een podcast van de AIVD. 
 Translation:  .
---- concatenating speaker segments
   start             end   speaker  \
0 0 days 0 days 00:00:05  Liesbeth   

                                       transcription  \
0   Privacy, hallo, het rechte privacy heb je nu ...   

                                         translation  
0   Privacy? You've just played the role of privacy.  
            start             end speaker                      transcription  \
0 0 days 00:00:05 0 days 00:00:08    Bart   Nee, dat heb je nooit verspeeld.   

                         translation  
0   No, I've never played it. Never.  
            start             end         speaker  \
0 0 days 00:00:08 0 days 00:00:21  Bart, Liesbeth   

                                       transcription  \
0   Nooit. Hoe ver kun je gaan om een aanslag te ...   

                                 translation  
0   How far can you go to preven

Input #0, wav, from '/tmp/tmpkpa51sk6.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.74, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.69 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpeikc9phj.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.14, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.07 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmppvituucf.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:03.29, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.22 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpu_ot6x7h.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.64, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.60 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpmy9ygzwg.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:19.34, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  19.29 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpzmsbbiqz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.59, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.51 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpoddl4lt1.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.03, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.97 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


 99%|█████████▉| 191700/193857 [10:29<00:07, 304.56frames/s]


---- starting translation ----


100%|██████████| 193857/193857 [08:55<00:00, 362.07frames/s]


---- merging everything ----
Starting times are not matching. Check the following lines ---- 
 Transcription:  Wat nu de ervaring ervoor leert, is dat de media heel snel bij ons op de stoep staan... 
 Translation:  The game is going to change now.
Segment [0:01:12 ---> 0:01:16] could not be matched with a single speaker 
 Transcript:  Een nieuwe dag in Zoetermeer. Er zijn meerdere teams bezig met het zoeken naar Sander...


Input #0, wav, from '/tmp/tmpwkbjz0c6.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.94 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:01:12 ---> 0:01:16] could not be matched with a single speaker 
 Transcript:  Een nieuwe dag in Zoetermeer. Er zijn meerdere teams bezig met het zoeken naar Sander...


Input #0, wav, from '/tmp/tmpseh35ygk.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.96 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:01:16 ---> 0:01:22] could not be matched with a single speaker 
 Transcript:  ...van Oost-Zint tot het internationale CT-team. En we hebben gekeken naar de mogelijkheden van kabelinterceptie.


Input #0, wav, from '/tmp/tmpz39c9akq.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:01:37 ---> 0:01:47] could not be matched with a single speaker 
 Transcript:  En dat is ook goed nieuws. En dat is ook goed nieuws. En dat is ook goed nieuws. Ik heb wat zaken vanochtend gekregen.


Input #0, wav, from '/tmp/tmpkevxrvuf.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Het team van Marja is gestuid op een foto... 
 Translation:  He is being kidnapped by a group of fighters in North Africa.
Segment [0:01:54 ---> 0:02:03] could not be matched with a single speaker 
 Transcript:  ...van een gemaskerde man. Die wordt onthaald door een groep strijders in Noord-Afrika.


Input #0, wav, from '/tmp/tmpl4l50ej3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Aldin. 
 Translation:  That is the other name of Sander.
Segment [0:02:08 ---> 0:02:13] could not be matched with a single speaker 
 Transcript:  Aldin. Dat is de andere naam van Sander.


Input #0, wav, from '/tmp/tmp5dbivyi8.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  En hij blijkt nu in een trainingskamp in Libië te zitten. 
 Translation:  And he is now in a training camp in Libya.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Waarvan het vermoeder er al lag. 
 Translation:  And then it suddenly goes fast.
Segment [0:03:02 ---> 0:03:08] could not be matched with a single speaker 
 Transcript:  Waarvan het vermoeder er al lag. En dan gaat het opeens hard.


Input #0, wav, from '/tmp/tmpxkaxlqg1.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:05:12 ---> 0:05:26] could not be matched with a single speaker 
 Transcript:  Is het gebruikelijk dat een tweet op die manier wordt verstuurd? ISIS, zeker in de tijd dat zij nog een staat waren, besteden heel veel tijd en moeite aan hun propaganda.


Input #0, wav, from '/tmp/tmpkz3ndoca.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  We denken dat het nu wel verstandig is om de politieken, de politieken, de politieken... 
 Translation:  And a lot of attention in the media.
Segment [0:05:56 ---> 0:06:12] could not be matched with a single speaker 
 Transcript:  We denken dat het nu wel verstandig is om de politieken, de politieken, de politieken... We denken dat het nu wel verstandig is om deze informatie tijdig richting onze politieke afnemers te gaan brengen.


Input #0, wav, from '/tmp/tmpiwllbec7.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:16.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  15.92 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik ga praten met iemand die namens de AFVD het contact met de Haagse vierkante kilometer onderhoudt. 
 Translation:  I am going to talk to someone who on behalf of the AFD.
Segment [0:10:30 ---> 0:10:36] could not be matched with a single speaker 
 Transcript:  En als ze misschien daar in ieder geval over na moeten gaan denken wat ze op een later moment moeten gaan doen.


Input #0, wav, from '/tmp/tmphlvy_hxd.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Nee, dat klopt denk ik wel. 
 Translation:  No, I think that's right.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Wat nu de ervaring in ieder geval leert... 
 Translation:  People are going to think that anyway.
Segment [0:16:56 ---> 0:17:11] could not be matched with a single speaker 
 Transcript:  om te vragen wat hier aan de hand is en wat ze hiermee moeten. Ik heb ook al een smsje gekregen van onze woordvoerder... met de vraag wat weten wij en wat kunnen we naar buiten brengen. Die woordvoerder wil ik wel spreken...


Input #0, wav, from '/tmp/tmpv0lnpuzz.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:15.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  14.90 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik snap dat je zegt we kunnen heel veel wel vertellen... 
 Translation:  I understand that you say that you can tell a lot.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Volgens onderzoekers natuurlijk nog te weinig en het zou meer moeten zijn. 
 Translation:  Dossiers of us.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Kun je nog een beetje normaal de krant lezen of naar een nieuwsprogramma kijken? 
 Translation:  No.
Starting times are not matching. Check the following lines ---- 
 Transcription:  We gebruiken natuurlijk de zaak van Sander... 
 Translation:  We of course use Sander's case as a vehicle to be able to explain what the AIVD does.
Segment [0:24:04 ---> 0:24:13] could not be matched with a single speaker 
 Transcript:  We gebruiken natuurlijk de zaak van Sander... om een beetje als vehikel om te kunnen duiden wat de AIV

Input #0, wav, from '/tmp/tmpz5zz3i8x.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Kijk, wat er over Sander op dit moment buiten ligt... 
 Translation:  Nothing about Sander.
Starting times are not matching. Check the following lines ---- 
 Transcription:  is één tweet waarin hij iets dreigends vertelt. 
 Translation:  Look, what is out there about Sander at the moment is one tweet.
Segment [0:24:37 ---> 0:24:44] could not be matched with a single speaker 
 Transcript:  is één tweet waarin hij iets dreigends vertelt. Maar wat wij werkelijk weten van Sander...


Input #0, wav, from '/tmp/tmp1is9se6y.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:24:37 ---> 0:24:44] could not be matched with a single speaker 
 Transcript:  is één tweet waarin hij iets dreigends vertelt. Maar wat wij werkelijk weten van Sander...


Input #0, wav, from '/tmp/tmp_8ov676t.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.92 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  dat ga ik allemaal in het einde van het einde van het einde van het einde van het einde van het einde... 
 Translation:  What we know, how far it is with Sander's plans.
Starting times are not matching. Check the following lines ---- 
 Transcription:  hun werk kunnen doen met de informatie die wij hebben over Sander... 
 Translation:  My work, the talking about what we do, can never endanger such research.
Segment [0:25:08 ---> 0:25:17] could not be matched with a single speaker 
 Transcript:  hun werk kunnen doen met de informatie die wij hebben over Sander... om vervolgens daarmee de dreiging weg te nemen.


Input #0, wav, from '/tmp/tmp_d4zherd.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:09.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   8.91 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Natuurlijk zien wij dit soort tweets ook. 
 Translation:  And we can't say what we will or won't do with it.
Starting times are not matching. Check the following lines ---- 
 Transcription:  of hij terug naar Nederland komt. 
 Translation:  And if he comes into action in Libya, if he leaves that camp, that we follow him.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat we weten waar hij heen gaat. 
 Translation:  Of course, the outside world is now alerted.
Segment [0:27:31 ---> 0:27:38] could not be matched with a single speaker 
 Transcript:  Dat we weten waar hij heen gaat. Het is nu wel zo natuurlijk dat de buitenwereld geallerteerd is.


Input #0, wav, from '/tmp/tmp_s7mse_b.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- concatenating speaker segments
   start             end speaker  \
0 0 days 0 days 00:00:06    Inge   

                                       transcription  \
0   Ik zeg weleens, de AIVD zit tussen James Bond...   

                                         translation  
0   I often say that the AIVD is between James Bo...  
merging segment:
   start             end speaker  \
0 0 days 0 days 00:00:06    Inge   

                                       transcription  \
0   Ik zeg weleens, de AIVD zit tussen James Bond...   

                                         translation  
0   I often say that the AIVD is between James Bo...  
with:
            start             end speaker  \
0 0 days 00:00:06 0 days 00:00:10    Inge   

                                       transcription  \
0   En afhankelijk van de casus waarin je zit, vi...   

                                         translation  
0   And depending on the cases you're in, people ...  
 
   start             end speaker

Input #0, wav, from '/tmp/tmp9ivj3o4_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.54, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.46 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmps_uvdoh3.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.89, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.80 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp75tb665x.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpxvy7q46_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:02.67, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   2.60 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpyk2075_7.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.64, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.55 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmp3itco12g.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:11.14, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.07 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpdir0yip0.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:00.91, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.85 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpnv50y0ow.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.40, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.35 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   




Input #0, wav, from '/tmp/tmpnqik2aa2.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:01.06, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   0.97 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- transcribing and translating audio fragment ----
Model is multilingual and has 762,321,920 parameters.
---- starting transcription ----


100%|██████████| 290930/290930 [17:40<00:00, 274.31frames/s]


---- starting translation ----


100%|██████████| 290930/290930 [14:31<00:00, 333.88frames/s]


---- merging everything ----
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je luistert naar de laatste aflevering van De Dienst, een podcast van de AIVD. 
 Translation:  He has returned.
Starting times are not matching. Check the following lines ---- 
 Transcription:  En dan kijk je nog twaalf keer naar wie was ik ook alweer, weet ik nog, geboorte daten of al hetzelfde. Dat bent. 
 Translation:  It is boring with that passport.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Sander keert dus terug naar Nederland. 
 Translation:  My name is Lies Petrasker.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Wij zitten natuurlijk niet meer op de eerste rang op dit moment. 
 Translation:  How are we going to do that?
Starting times are not matching. Check the following lines ---- 
 Transcription:  Wij gaan door met ons onderzoek, maar wij moeten ervoor zorgen dat we de politie niet in de wi

Input #0, wav, from '/tmp/tmpgh5mnium.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:12.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  11.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ja, die spanning begint enorm op te lopen. 
 Translation:  Yes.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Je hebt er heel veel energie in gestoken. 
 Translation:  Because you have been involved in it so intensively.
Starting times are not matching. Check the following lines ---- 
 Transcription:  En dan is eigenlijk bijna het hoogtepunt van je onderzoek zou dan zijn het feit dat je een anders bericht geschreven hebt. 
 Translation:  Yes, there is a bit of a feeling of loss.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Of sterker nog, zou hij zijn kinderen naar zo'n evenement laten gaan? 
 Translation:  If such an investigation takes place.
Segment [0:04:23 ---> 0:04:27] could not be matched with a single speaker 
 Transcript:  Of sterker nog, zou hij zijn kinderen naar zo'n evenement laten gaan?


Input #0, wav, from '/tmp/tmpbi2snu9h.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.96 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  ...er is een aanslag gebleven. 
 Translation:  Would Bart still go to such an event?
Starting times are not matching. Check the following lines ---- 
 Transcription:  ...wat ik voor mezelf altijd wel heb is dat ik niet... 
 Translation:  But what I always have for myself is that I don't want to be careless.
Starting times are not matching. Check the following lines ---- 
 Transcription:  We zijn... 
 Translation:  Yes.
Segment [0:05:44 ---> 0:05:54] could not be matched with a single speaker 
 Transcript:  Ja, ja. Ja, en ik merk ook wel dat ik... ...soms wel bepaalde overwegingen heb...


Input #0, wav, from '/tmp/tmp6fs6zu3l.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.94 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  ...ja, dat is al lastiger. 
 Translation:  What is now the case, this can still take two or three weeks.
Segment [0:06:10 ---> 0:06:16] could not be matched with a single speaker 
 Transcript:  ...ja, dat is al lastiger. Wat nu de zaak is... ...dit kan nog twee, drie weken duren...


Input #0, wav, from '/tmp/tmp_7snjgbt.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  ...dat hij die aanslag nu gaat plegen... 
 Translation:  And they will do it for as long as possible.
Starting times are not matching. Check the following lines ---- 
 Transcription:  ...van de kant van operatieën... 
 Translation:  Yes.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat wil eigenlijk met name zeggen de microfoons... 
 Translation:  Yes.
Segment [0:13:33 ---> 0:13:41] could not be matched with a single speaker 
 Transcript:  Wist jij altijd dat je hier geschikt voor was? Nee, maar goed, het is ook iets waar je uiteindelijk ingroeit...


Input #0, wav, from '/tmp/tmplirtj4l_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:08.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   7.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Heb je weleens situaties gezeten waar je bang was? 
 Translation:  Have you ever been in situations where you were afraid?
Segment [0:17:05 ---> 0:17:09] could not be matched with a single speaker 
 Transcript:  Heb je weleens situaties gezeten waar je bang was? Nee.


Input #0, wav, from '/tmp/tmp7uo4mpun.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:17:43 ---> 0:17:57] could not be matched with a single speaker 
 Transcript:  ...die optreden op het moment dat het nodig mocht zijn... ...ja, wat dat betreft is het ook minder spannend. Ik had eerder deze serie aan mijn teamhalft Bart gevraagd... ...ik kwam hem namelijk tegen bij de bus halte...


Input #0, wav, from '/tmp/tmp4gcq8qhp.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.94 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:17:43 ---> 0:17:57] could not be matched with a single speaker 
 Transcript:  ...die optreden op het moment dat het nodig mocht zijn... ...ja, wat dat betreft is het ook minder spannend. Ik had eerder deze serie aan mijn teamhalft Bart gevraagd... ...ik kwam hem namelijk tegen bij de bus halte...


Input #0, wav, from '/tmp/tmp2yjm8dig.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:14.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  13.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ja. 
 Translation:  but also your voice for others.
Starting times are not matching. Check the following lines ---- 
 Transcription:  We weten niet of wat er in die tassen zit. 
 Translation:  Sander and Mehmet in Amsterdam
Segment [0:22:14 ---> 0:22:16] could not be matched with a single speaker 
 Transcript:  We weten niet of wat er in die tassen zit.


Input #0, wav, from '/tmp/tmp81d238al.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:02.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   1.95 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Er is door de politie een bestand gevonden op de computer van Sander met zijn testament. 
 Translation:  The police are on top of it and can intervene at any time it is necessary.
Segment [0:22:27 ---> 0:22:46] could not be matched with a single speaker 
 Transcript:  Er is door de politie een bestand gevonden op de computer van Sander met zijn testament. Er is een videoboodschap gevonden van hem...


Input #0, wav, from '/tmp/tmpzeteee7d.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:19.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
  18.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Hoofdnaaldje ICT, maar het gaat op eigenlijk allerlei gebieden. 
 Translation:  I work mainly in ICT, but it actually goes in all kinds of areas.
Segment [0:26:00 ---> 0:26:05] could not be matched with a single speaker 
 Transcript:  ...onze best mogelijkheden geeft om daar iets actiefs mee te doen.


Input #0, wav, from '/tmp/tmpjspieul_.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ja precies, oké. 
 Translation:  No worries.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik kan me niet voorstellen hoe het moet zijn om met je andere paspoort... 
 Translation:  Okay.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Dat wendt echt na een tijd. 
 Translation:  I was there before, I don't know.
Starting times are not matching. Check the following lines ---- 
 Transcription:  ...om dat te transporteren. 
 Translation:  And sometimes that means that we walk around with a box full of items,
Segment [0:32:34 ---> 0:32:40] could not be matched with a single speaker 
 Transcript:  ...om dat te transporteren. En soms betekent dat wel een snetje...


Input #0, wav, from '/tmp/tmpblh7vz_d.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.95 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Segment [0:35:02 ---> 0:35:06] could not be matched with a single speaker 
 Transcript:  Ja. Als ik dit een tijd niet doe...


Input #0, wav, from '/tmp/tmpo8q5f1w2.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:04.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   3.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  ...wat wij noemen onder controle te hebben. 
 Translation:  If we take Sander's case as an example again,
Segment [0:36:30 ---> 0:36:36] could not be matched with a single speaker 
 Transcript:  ...wat wij noemen onder controle te hebben. Als we de zaak Sander weer als voorbeeld erbij pakken...


Input #0, wav, from '/tmp/tmp0y_ypli1.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:06.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   5.93 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ik heb al een opleiding om te kunnen volgen... 
 Translation:  follow him?
Starting times are not matching. Check the following lines ---- 
 Transcription:  ...van het feit dat iemand iets gaat doen... 
 Translation:  excluding the fact that someone is going to do something.
Starting times are not matching. Check the following lines ---- 
 Transcription:  Gefeliciteerd Liesbeth. 
 Translation:  Congratulations Lisbeth.
Starting times are not matching. Check the following lines ---- 
 Transcription:  ...het onderdeel van de politie... 
 Translation:  The part of the police
Segment [0:43:50 ---> 0:43:57] could not be matched with a single speaker 
 Transcript:  Ik vind het een bevredigend einde als je het zo uitlegt.


Input #0, wav, from '/tmp/tmprasm1e1c.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:07.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   6.94 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  Ja, de casus is klaar. 
 Translation:  Well, that seems nice.
Segment [0:47:35 ---> 0:47:45] could not be matched with a single speaker 
 Transcript:  Mijn zesdaagse AIVD-carrière zit erop.


Input #0, wav, from '/tmp/tmpafigkuw6.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:10.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   9.96 M-A: -0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


Starting times are not matching. Check the following lines ---- 
 Transcription:  naar huis. 
 Translation:  This was the last episode
Segment [0:47:50 ---> 0:47:55] could not be matched with a single speaker 
 Transcript:  naar huis. Dit was de laatste aflevering van De Dienst.


Input #0, wav, from '/tmp/tmpv_j_qj8u.wav':   0KB sq=    0B f=0/0   
  Duration: 00:00:05.00, bitrate: 1411 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 44100 Hz, 2 channels, s16, 1411 kb/s
   4.90 M-A:  0.000 fd=   0 aq=    0KB vq=    0KB sq=    0B f=0/0   


---- concatenating speaker segments
   start             end speaker  \
0 0 days 0 days 00:00:04    Bart   

                                       transcription  \
0   Hoi Elisabeth. Ja, je vraagt je natuurlijk af...   

                                         translation  
0   Hi Elisabeth. Yes, of course you are wonderin...  
merging segment:
   start             end speaker  \
0 0 days 0 days 00:00:04    Bart   

                                       transcription  \
0   Hoi Elisabeth. Ja, je vraagt je natuurlijk af...   

                                         translation  
0   Hi Elisabeth. Yes, of course you are wonderin...  
with:
            start             end speaker  \
0 0 days 00:00:04 0 days 00:00:06    Bart   

                            transcription                    translation  
0   Ja, dat kan niet veel goed betekenen.   Yes, that doesn't mean much.  
 
   start             end speaker  \
0 0 days 0 days 00:00:06    Bart   

                                