## 0. Configuartion

In [1]:
# %load_ext autoreload
import os
import sys

device = 'cuda'
wait_for_v100 = False
ds_to_use = [
    # 'HP1-FvM',
    # 'HP1-RB',
    # 'HP2-FvM',
    # 'HP2-RB',
    # 'HP3-FvM',
    # 'HP3-RB',
    # 'HP4-FvM',
    # 'HP4-RB',
    # 'HP5-FvM',
    # 'HP5-RB',
    # 'HP6-FvM',
    # 'HP6-RB',
    # 'HP7-FvM',
    # 'HP7-RB'
]
home = os.getenv("HOME")
use_gdrive = False
trained_model_directory = 'trained_model'
# Use Model from Checkpoint. 
# The Value must be a Sub-Directory of {trained_model_path} (Example: 'checkpoint-3600')
checkpoint = None

## 1. Setup Directories

In [2]:
local_nlp_base_dir = f'{home}/shared'
local_data_base_dir = f'{local_nlp_base_dir}/NLP-Data'
local_data_dir = f'{local_data_base_dir}/audio'
extern_nlp_base_dir = None
extern_data_base_dir = None
extern_data_dir = None

runs_on_colab = (home == '/root')
print( f'runs on colab: {runs_on_colab}')

if not runs_on_colab:
    os.environ['http_proxy'] = 'http://192.168.8.50:3128'
    os.environ['https_proxy'] = 'http://192.168.8.50:3128'
else:
    # to get access to the datasets we use gdrive
    use_gdrive = True
    # install packages
    !pip install datasets==1.4.1
    !pip install transformers==4.4.0
    !pip install jiwer
    !pip install torchaudio
    !pip install librosa
    # create local directories
    !mkdir $local_nlp_base_dir
    !mkdir $local_data_base_dir

if use_gdrive:
    gdrive_base = '/content/gdrive'    
    extern_nlp_base_dir = f'{gdrive_base}/MyDrive'
    extern_data_base_dir = f'{extern_nlp_base_dir}/NLP-Data'
    extern_data_dir = f'{extern_data_base_dir}/audio'

    if not os.path.isdir(gdrive_base):
        from google.colab import drive
        drive.mount(gdrive_base)

if not os.path.isdir(local_data_dir):
    !mkdir $local_data_dir

if extern_nlp_base_dir:
    model_dir = f'{extern_nlp_base_dir}/NLP-Models/GermanWave2Vec'
else:
    model_dir = f'{local_nlp_base_dir}/NLP-Models/GermanWave2Vec'

# Use the Model from this Directory (Base: .../NLP-Models/GermanWave2Vec/)
# None -> Start from 'facebook/wav2vec2-large-xlsr-53-german'
trained_model_path = f'{model_dir}/{trained_model_directory}'

git_views_dir = f'{local_nlp_base_dir}/gitviews/extern'
git_view_path = f'{git_views_dir}/GermanWave2Vec'

if not os.path.isdir(git_views_dir):
    !mkdir $git_views_dir
    !cd $git_views_dir; git clone https://github.com/ElUnrast/GermanWave2Vec.git

runs on colab: False


In [3]:
if runs_on_colab:
    !cd $git_view_path; git fetch --all; git reset --hard origin/main

script_path = f'{git_views_dir}/GermanWave2Vec/python'
print(f'Script-Path: {script_path}')
sys.path.insert(0, script_path)

Script-Path: /home/ki-mo/shared/gitviews/extern/GermanWave2Vec/python


## 2. Check Runtime Properties

In [4]:
if 'cuda' == device:
    gpu_info = !nvidia-smi
    gpu_info = '\n'.join(gpu_info)

    if runs_on_colab:
        if wait_for_v100 and not gpu_info.find('V100') >= 0:
            print('The current GPU is not a V100')
            print('Since you want to wait for a V100 the current session is aborted')
            raise ValueError

        if gpu_info.find('failed') >= 0:
            print('For training, please use a VM with GPU!')
            raise ValueError
            
        from psutil import virtual_memory
        ram_gb = virtual_memory().total / 1e9
        print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

        if ram_gb < 20:
            print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
            print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
            print('re-execute this cell.')
        else:
            print('You are using a high-RAM runtime!')
            
    print(gpu_info)

Tue Jun  1 17:38:02 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  Off  | 00000000:09:00.0 Off |                  N/A |
| 35%   37C    P8    13W / 260W |     86MiB / 11016MiB |     17%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## 3 Install packages and do Imports

In [5]:
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union

import torch
import torchaudio
from torch import nn
from torch.cuda.amp import autocast

from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor
from transformers import Trainer, TrainingArguments
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_pt_utils import LengthGroupedSampler, DistributedLengthGroupedSampler

import json
import collections
import librosa
import numpy as np
import pandas as pd
import sklearn
import jiwer
from jiwer import wer
from datasets import load_metric
from tqdm.notebook import tqdm_notebook
from sklearn.model_selection import train_test_split

In [6]:
# %aimport SnippetDatasets
# %aimport GermanSpeechDatasetWidgetFactory
# %aimport GermanSpeechToTextTranslater
# %autoreload 2
from SnippetDatasets import SnippetDatasets
from GermanSpeechDatasetWidgetFactory import GermanSpeechDatasetWidgetFactory
from GermanSpeechToTextTranslater import GermanSpeechToTextTranslater

## 4 Initialize Helper Classes

In [7]:
my_datasets = SnippetDatasets(
    runs_on_colab, 
    local_audio_base_dir=local_data_dir, 
    git_repository=git_view_path,
    extern_audio_base_dir=extern_data_dir
)

In [8]:
model_path = None

if checkpoint:
    model_path = f'{trained_model_path}/{checkpoint}'
elif os.path.isfile(f'{trained_model_path}/pytorch_model.bin'):
    model_path = trained_model_path

print(f'Initialize Model from Path: {model_path}')
translator = GermanSpeechToTextTranslater(model_name=model_path, ds_handler=my_datasets)

Initialize Model from Path: /home/ki-mo/shared/NLP-Models/GermanWave2Vec/trained_model
Using Model: /home/ki-mo/shared/NLP-Models/GermanWave2Vec/trained_model
Loading processor
Loading metric
json loaded: {'trained_epochs': 118}
Saved Epoch: 118
Loading model. Epoche 118


## 5. Choose Dataset

In [9]:
widget_factory = GermanSpeechDatasetWidgetFactory(my_datasets, ds_to_use)
print('Please choose Datasets to use')
ds_checkboxes_widget = widget_factory.create_dataset_choice_widget()
display(ds_checkboxes_widget)

Please choose Datasets to use


Box(children=(Checkbox(value=False, description='HP1-FvM', indent=False), Checkbox(value=False, description='H…

## 6. Test and update Datasets

In [14]:
for ds_id in widget_factory.get_used_datasets():
    translator.test(ds_id)

Loading Dataset: common-voice-de-02v30
-----------------------------
Loading Dataset: common-voice-de-02v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-02v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-02v30 with 11906 Entries
 - 11906 Entries left after Length Cut (min=31, max=4000)
 - 11906 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 118
old word error rate: 22.431734059641038
Translation is up to date
Loading Dataset: common-voice-de-03v30
-----------------------------
Loading Dataset: common-voice-de-03v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-03v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-03v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 11954 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11954.0), HTML(value='')))


Calculate WER
WER: 0.21833651758060496
No. of bad translated snippets: 7697
Saving diff files
Loading Dataset: common-voice-de-04v30
-----------------------------
Loading Dataset: common-voice-de-04v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-04v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-04v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12162 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12162.0), HTML(value='')))


Calculate WER
WER: 0.20339738680140376
No. of bad translated snippets: 7686
Saving diff files
Loading Dataset: common-voice-de-05v30
-----------------------------
Loading Dataset: common-voice-de-05v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-05v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-05v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 11957 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11957.0), HTML(value='')))


Calculate WER
WER: 0.20198052851182197
No. of bad translated snippets: 7770
Saving diff files
Loading Dataset: common-voice-de-06v30
-----------------------------
Loading Dataset: common-voice-de-06v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-06v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-06v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12262 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12262.0), HTML(value='')))


Calculate WER
WER: 0.20112950299760823
No. of bad translated snippets: 8070
Saving diff files
Loading Dataset: common-voice-de-07v30
-----------------------------
Loading Dataset: common-voice-de-07v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-07v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-07v30 with 18839 Entries
 - 18838 Entries left after Length Cut (min=31, max=4000)
 - 12163 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12163.0), HTML(value='')))


Calculate WER
WER: 0.19925371569922226
No. of bad translated snippets: 8167
Saving diff files
Loading Dataset: common-voice-de-08v30
-----------------------------
Loading Dataset: common-voice-de-08v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-08v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-08v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 11815 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11815.0), HTML(value='')))


Calculate WER
WER: 0.1993439340525918
No. of bad translated snippets: 7980
Saving diff files
Loading Dataset: common-voice-de-09v30
-----------------------------
Loading Dataset: common-voice-de-09v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-09v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-09v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12443 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12443.0), HTML(value='')))


Calculate WER
WER: 0.19708820489959678
No. of bad translated snippets: 8538
Saving diff files
Loading Dataset: common-voice-de-10v30
-----------------------------
Loading Dataset: common-voice-de-10v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-10v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-10v30 with 18839 Entries
 - 18838 Entries left after Length Cut (min=31, max=4000)
 - 12643 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12643.0), HTML(value='')))


Calculate WER
WER: 0.2023704387619927
No. of bad translated snippets: 8623
Saving diff files
Loading Dataset: common-voice-de-11v30
-----------------------------
Loading Dataset: common-voice-de-11v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-11v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-11v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12538 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12538.0), HTML(value='')))


Calculate WER
WER: 0.18628199257110872
No. of bad translated snippets: 8255
Saving diff files
Loading Dataset: common-voice-de-12v30
-----------------------------
Loading Dataset: common-voice-de-12v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-12v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-12v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12012 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12012.0), HTML(value='')))


Calculate WER
WER: 0.19391551761873496
No. of bad translated snippets: 8147
Saving diff files
Loading Dataset: common-voice-de-13v30
-----------------------------
Loading Dataset: common-voice-de-13v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-13v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-13v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13049 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13049.0), HTML(value='')))


Calculate WER
WER: 0.20507356175766167
No. of bad translated snippets: 8944
Saving diff files
Loading Dataset: common-voice-de-14v30
-----------------------------
Loading Dataset: common-voice-de-14v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-14v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-14v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13063 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13063.0), HTML(value='')))


Calculate WER
WER: 0.1953047547289527
No. of bad translated snippets: 8847
Saving diff files
Loading Dataset: common-voice-de-15v30
-----------------------------
Loading Dataset: common-voice-de-15v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-15v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-15v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12468 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12468.0), HTML(value='')))


Calculate WER
WER: 0.20303813964853692
No. of bad translated snippets: 8732
Saving diff files
Loading Dataset: common-voice-de-16v30
-----------------------------
Loading Dataset: common-voice-de-16v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-16v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-16v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13105 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13105.0), HTML(value='')))


Calculate WER
WER: 0.19678485695171383
No. of bad translated snippets: 8807
Saving diff files
Loading Dataset: common-voice-de-17v30
-----------------------------
Loading Dataset: common-voice-de-17v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-17v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-17v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12372 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12372.0), HTML(value='')))


Calculate WER
WER: 0.20388299832152945
No. of bad translated snippets: 8551
Saving diff files
Loading Dataset: common-voice-de-18v30
-----------------------------
Loading Dataset: common-voice-de-18v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-18v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-18v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 11593 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11593.0), HTML(value='')))


Calculate WER
WER: 0.17037262772955591
No. of bad translated snippets: 7501
Saving diff files
Loading Dataset: common-voice-de-19v30
-----------------------------
Loading Dataset: common-voice-de-19v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-19v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-19v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 11482 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11482.0), HTML(value='')))


Calculate WER
WER: 0.1866290626398443
No. of bad translated snippets: 7765
Saving diff files
Loading Dataset: common-voice-de-20v30
-----------------------------
Loading Dataset: common-voice-de-20v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-20v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-20v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13091 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13091.0), HTML(value='')))


Calculate WER
WER: 0.16465333371591442
No. of bad translated snippets: 8060
Saving diff files
Loading Dataset: common-voice-de-21v30
-----------------------------
Loading Dataset: common-voice-de-21v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-21v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-21v30 with 18839 Entries
 - 18838 Entries left after Length Cut (min=31, max=4000)
 - 12796 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12796.0), HTML(value='')))


Calculate WER
WER: 0.17010832438762566
No. of bad translated snippets: 8116
Saving diff files
Loading Dataset: common-voice-de-22v30
-----------------------------
Loading Dataset: common-voice-de-22v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-22v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-22v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12597 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12597.0), HTML(value='')))


Calculate WER
WER: 0.20323576434626098
No. of bad translated snippets: 8908
Saving diff files
Loading Dataset: common-voice-de-23v30
-----------------------------
Loading Dataset: common-voice-de-23v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-23v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-23v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13600 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13600.0), HTML(value='')))


Calculate WER
WER: 0.22030627549509443
No. of bad translated snippets: 9796
Saving diff files
Loading Dataset: common-voice-de-24v30
-----------------------------
Loading Dataset: common-voice-de-24v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-24v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-24v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13041 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13041.0), HTML(value='')))


Calculate WER
WER: 0.2096123640171421
No. of bad translated snippets: 8169
Saving diff files
Loading Dataset: common-voice-de-25v30
-----------------------------
Loading Dataset: common-voice-de-25v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-25v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-25v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13468 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13468.0), HTML(value='')))


Calculate WER
WER: 0.18466002514749977
No. of bad translated snippets: 8777
Saving diff files
Loading Dataset: common-voice-de-26v30
-----------------------------
Loading Dataset: common-voice-de-26v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-26v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-26v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 13239 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=13239.0), HTML(value='')))


Calculate WER
WER: 0.1518343184216178
No. of bad translated snippets: 7975
Saving diff files
Loading Dataset: common-voice-de-27v30
-----------------------------
Loading Dataset: common-voice-de-27v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-27v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-27v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 12819 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12819.0), HTML(value='')))


Calculate WER
WER: 0.17827723694063835
No. of bad translated snippets: 8321
Saving diff files
Loading Dataset: common-voice-de-28v30
-----------------------------
Loading Dataset: common-voice-de-28v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-28v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-28v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 8071 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=8071.0), HTML(value='')))


Calculate WER
WER: 0.15169588216989938
No. of bad translated snippets: 5437
Saving diff files
Loading Dataset: common-voice-de-29v30
-----------------------------
Loading Dataset: common-voice-de-29v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-29v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-29v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 7960 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=7960.0), HTML(value='')))


Calculate WER
WER: 0.17257246813007476
No. of bad translated snippets: 5757
Saving diff files
Loading Dataset: common-voice-de-30v30
-----------------------------
Loading Dataset: common-voice-de-30v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-30v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-30v30 with 18839 Entries
 - 18839 Entries left after Length Cut (min=31, max=4000)
 - 9525 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=9525.0), HTML(value='')))


Calculate WER
WER: 0.170118041552439
No. of bad translated snippets: 6688
Saving diff files
Loading Dataset: german-speechdata-package-v2-1v4
-----------------------------
Loading Dataset: german-speechdata-package-v2-1v4 - /home/ki-mo/shared/NLP-Data/audio/german-speechdata-package-v2/german-speechdata-package-v2-1v4/content-translated-with_original.csv
Pruning Dataset german-speechdata-package-v2-1v4 with 20106 Entries
 - 20091 Entries left after Length Cut (min=31, max=4000)
 - 7481 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=7481.0), HTML(value='')))


Calculate WER
WER: 0.17572201338589896
No. of bad translated snippets: 4991
Saving diff files
Loading Dataset: german-speechdata-package-v2-2v4
-----------------------------
Loading Dataset: german-speechdata-package-v2-2v4 - /home/ki-mo/shared/NLP-Data/audio/german-speechdata-package-v2/german-speechdata-package-v2-2v4/content-translated-with_original.csv
Pruning Dataset german-speechdata-package-v2-2v4 with 20107 Entries
 - 20098 Entries left after Length Cut (min=31, max=4000)
 - 6720 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=6720.0), HTML(value='')))


Calculate WER
WER: 0.14816247582205028
No. of bad translated snippets: 4729
Saving diff files
Loading Dataset: german-speechdata-package-v2-3v4
-----------------------------
Loading Dataset: german-speechdata-package-v2-3v4 - /home/ki-mo/shared/NLP-Data/audio/german-speechdata-package-v2/german-speechdata-package-v2-3v4/content-translated-with_original.csv
Pruning Dataset german-speechdata-package-v2-3v4 with 20107 Entries
 - 20037 Entries left after Length Cut (min=31, max=4000)
 - 11075 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=11075.0), HTML(value='')))


Calculate WER
WER: 0.22731613075146562
No. of bad translated snippets: 6248
Saving diff files
Loading Dataset: german-speechdata-package-v2-4v4
-----------------------------
Loading Dataset: german-speechdata-package-v2-4v4 - /home/ki-mo/shared/NLP-Data/audio/german-speechdata-package-v2/german-speechdata-package-v2-4v4/content-translated-with_original.csv
Pruning Dataset german-speechdata-package-v2-4v4 with 20107 Entries
 - 20095 Entries left after Length Cut (min=31, max=4000)
 - 12288 Entries left after Action Cut
aktual trained epoches: 118
old trained epoches: 0
old word error rate: 1
Saving word_error_rate: 100
Translate all


HBox(children=(FloatProgress(value=0.0, max=12288.0), HTML(value='')))


Calculate WER
WER: 0.1617690676802408
No. of bad translated snippets: 5598
Saving diff files


In [11]:
print('finished')
widget_factory.play_audio_file(audio_url="http://www.w3schools.com/html/horse.ogg")

finished


In [12]:
ds = my_datasets.load_ds_content_translated_with_original('common-voice-de-01v30')
ds

-----------------------------
Loading Dataset: common-voice-de-01v30 - /home/ki-mo/shared/NLP-Data/audio/common-voice-de/common-voice-de-01v30/content-translated-with_original.csv
Pruning Dataset common-voice-de-01v30 with 18839 Entries
 - 18838 Entries left after Length Cut (min=31, max=4000)
 - 11813 Entries left after Action Cut


Unnamed: 0,Autor,Sprecher,Titel,Orginaldatei,Datei,Start,End,Length,Size,Translated0,OriginalText,Action
0,cv-corpus-6.1-2020-12-11/de,0052c07533a6976233ad5926d950b523002c4d8cdd9ae8...,common-voice-de-01v30,common_voice_de_17922420.mp3,common_voice_de_17922420.mp3,0,149,149,56448,zieht euch bitte draussen die schuhe aus,zieht euch bitte draußen die schuhe aus,train
5,cv-corpus-6.1-2020-12-11/de,03ac3eb87717f430b8a64228e61b5829cf6187f790c0fd...,common-voice-de-01v30,common_voice_de_18520278.mp3,common_voice_de_18520278.mp3,0,194,194,73728,was soll's ich bin bereit,was solls ich bin bereit,train
6,cv-corpus-6.1-2020-12-11/de,0497af98ed3a4300df41acdad9092f8a608ec193733ba0...,common-voice-de-01v30,common_voice_de_18204372.mp3,common_voice_de_18204372.mp3,0,273,273,104064,das internett besteht aus vielen kompiutern di...,das internet besteht aus vielen computern die ...,train
7,cv-corpus-6.1-2020-12-11/de,05adbec1ccc723a881575c5e93da4cff89412f3e806bde...,common-voice-de-01v30,common_voice_de_17826889.mp3,common_voice_de_17826889.mp3,0,172,172,65280,der uranus ist der siebente planet in unserm sy,der uranus ist der siebente planet in unserem ...,train
9,cv-corpus-6.1-2020-12-11/de,089487abd205588284ef22d657f47b228c76b49dd10d91...,common-voice-de-01v30,common_voice_de_20873706.mp3,common_voice_de_20873706.mp3,0,175,175,66432,sie war die cousine von karl maria von weber,sie war die cousine von carl maria von weber,train
...,...,...,...,...,...,...,...,...,...,...,...,...
18834,cv-corpus-6.1-2020-12-11/de,b5f59d0f848577d0f36de2387ee41941e71575f2277b08...,common-voice-de-01v30,common_voice_de_22042746.mp3,common_voice_de_22042746.mp3,0,108,108,40704,fünf,fünf,train
18835,cv-corpus-6.1-2020-12-11/de,b5f59d0f848577d0f36de2387ee41941e71575f2277b08...,common-voice-de-01v30,common_voice_de_22042750.mp3,common_voice_de_22042750.mp3,0,101,101,38016,vie,vier,train
18836,cv-corpus-6.1-2020-12-11/de,b5f59d0f848577d0f36de2387ee41941e71575f2277b08...,common-voice-de-01v30,common_voice_de_22042753.mp3,common_voice_de_22042753.mp3,0,131,131,49536,sieben,sieben,train
18837,cv-corpus-6.1-2020-12-11/de,b5f59d0f848577d0f36de2387ee41941e71575f2277b08...,common-voice-de-01v30,common_voice_de_22042755.mp3,common_voice_de_22042755.mp3,0,111,111,41856,sechs,sechs,train
