## 0. Configuartion

In [1]:
# %load_ext autoreload
import os
import sys

device = 'cuda'
wait_for_v100 = False
ds_to_use = [
    'HP1-FvM',
    'HP1-RB',
    'HP2-FvM',
    'HP2-RB',
    'HP3-FvM',
    'HP3-RB',
    'HP4-FvM',
    'HP4-RB',
    'HP5-FvM',
    'HP5-RB',
    'HP6-FvM',
    'HP6-RB',
    'HP7-FvM',
    'HP7-RB'
]
home = os.getenv("HOME")
use_gdrive = False
trained_model_directory = 'trained_model'
# Use Model from Checkpoint. 
# The Value must be a Sub-Directory of {trained_model_path} (Example: 'checkpoint-3600')
checkpoint = None

## 1. Setup Directories

In [2]:
local_nlp_base_dir = f'{home}/shared'
local_data_base_dir = f'{local_nlp_base_dir}/NLP-Data'
local_data_dir = f'{local_data_base_dir}/audio'
extern_nlp_base_dir = None
extern_data_base_dir = None
extern_data_dir = None

runs_on_colab = (home == '/root')
print( f'runs on colab: {runs_on_colab}')

if not runs_on_colab:
    os.environ['http_proxy'] = 'http://192.168.8.50:3128'
    os.environ['https_proxy'] = 'http://192.168.8.50:3128'
else:
    # to get access to the datasets we use gdrive
    use_gdrive = True
    # install packages
    !pip install datasets==1.4.1
    !pip install transformers==4.4.0
    !pip install jiwer
    !pip install torchaudio
    !pip install librosa
    # create local directories
    !mkdir $local_nlp_base_dir
    !mkdir $local_data_base_dir

if use_gdrive:
    gdrive_base = '/content/gdrive'    
    extern_nlp_base_dir = f'{gdrive_base}/MyDrive'
    extern_data_base_dir = f'{extern_nlp_base_dir}/NLP-Data'
    extern_data_dir = f'{extern_data_base_dir}/audio'

    if not os.path.isdir(gdrive_base):
        from google.colab import drive
        drive.mount(gdrive_base)

if not os.path.isdir(local_data_dir):
    !mkdir $local_data_dir

if extern_nlp_base_dir:
    model_dir = f'{extern_nlp_base_dir}/NLP-Models/GermanWave2Vec'
else:
    model_dir = f'{local_nlp_base_dir}/NLP-Models/GermanWave2Vec'

# Use the Model from this Directory (Base: .../NLP-Models/GermanWave2Vec/)
# None -> Start from 'facebook/wav2vec2-large-xlsr-53-german'
trained_model_path = f'{model_dir}/{trained_model_directory}'

git_views_dir = f'{local_nlp_base_dir}/gitviews/extern'
git_view_path = f'{git_views_dir}/GermanWave2Vec'

if not os.path.isdir(git_views_dir):
    !mkdir $git_views_dir
    !cd $git_views_dir; git clone https://github.com/ElUnrast/GermanWave2Vec.git

runs on colab: False


In [3]:
if runs_on_colab:
    !cd $git_view_path; git fetch --all; git reset --hard origin/main

script_path = f'{git_views_dir}/GermanWave2Vec/python'
print(f'Script-Path: {script_path}')
sys.path.insert(0, script_path)

Script-Path: /home/ki-mo/shared/gitviews/extern/GermanWave2Vec/python


## 2. Check Runtime Properties

In [4]:
if 'cuda' == device:
    gpu_info = !nvidia-smi
    gpu_info = '\n'.join(gpu_info)

    if runs_on_colab:
        if wait_for_v100 and not gpu_info.find('V100') >= 0:
            print('The current GPU is not a V100')
            print('Since you want to wait for a V100 the current session is aborted')
            raise ValueError

        if gpu_info.find('failed') >= 0:
            print('For training, please use a VM with GPU!')
            raise ValueError
            
        from psutil import virtual_memory
        ram_gb = virtual_memory().total / 1e9
        print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

        if ram_gb < 20:
            print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
            print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
            print('re-execute this cell.')
        else:
            print('You are using a high-RAM runtime!')
            
    print(gpu_info)

Sun May 16 03:24:36 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  Off  | 00000000:09:00.0 Off |                  N/A |
| 35%   33C    P8    13W / 260W |     92MiB / 11016MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## 3 Install packages and do Imports

In [5]:
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union

import torch
import torchaudio
from torch import nn
from torch.cuda.amp import autocast

from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor
from transformers import Trainer, TrainingArguments
from transformers.trainer_utils import get_last_checkpoint, is_main_process
from transformers.trainer_pt_utils import LengthGroupedSampler, DistributedLengthGroupedSampler

import json
import collections
import librosa
import numpy as np
import pandas as pd
import sklearn
import jiwer
from jiwer import wer
from datasets import load_metric
from tqdm.notebook import tqdm_notebook
from sklearn.model_selection import train_test_split

In [6]:
# %aimport SnippetDatasets
# %aimport GermanSpeechDatasetWidgetFactory
# %aimport GermanSpeechToTextTranslater
# %autoreload 2
from SnippetDatasets import SnippetDatasets
from GermanSpeechDatasetWidgetFactory import GermanSpeechDatasetWidgetFactory
from GermanSpeechToTextTranslater import GermanSpeechToTextTranslater

## 4 Initialize Helper Classes

In [7]:
my_datasets = SnippetDatasets(
    runs_on_colab, 
    local_audio_base_dir=local_data_dir, 
    git_repository=git_view_path,
    extern_audio_base_dir=extern_data_dir
)

In [8]:
model_path = None

if checkpoint:
    model_path = f'{trained_model_path}/{checkpoint}'
elif os.path.isfile(f'{trained_model_path}/pytorch_model.bin'):
    model_path = trained_model_path

print(f'Initialize Model from Path: {model_path}')
translator = GermanSpeechToTextTranslater(model_name=model_path, ds_handler=my_datasets)

Initialize Model from Path: /home/ki-mo/shared/NLP-Models/GermanWave2Vec/trained_model
Using Model: /home/ki-mo/shared/NLP-Models/GermanWave2Vec/trained_model
Loading processor
Loading metric
Loading model


## 5. Choose Dataset

In [9]:
widget_factory = GermanSpeechDatasetWidgetFactory(my_datasets, ds_to_use)
print('Please choose Datasets to use')
ds_checkboxes_widget = widget_factory.create_dataset_choice_widget()
display(ds_checkboxes_widget)

Please choose Datasets to use


Box(children=(Checkbox(value=True, description='HP1-FvM', indent=False), Checkbox(value=True, description='HP1…

## 6. Test and update Datasets

In [10]:
for ds_id in widget_factory.get_used_datasets():
    translator.test(ds_id)

Loading Dataset: HP1-FvM - content-translated-with_original.csv
Pruning Dataset HP1-FvM with 8055 Entries
 - 8055 Entries left after Length Cut (min=31, max=4000)
 - 8055 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=8055.0), HTML(value='')))


Calculate WER
WER: 0.01798845031254306
No. of bad translated snippets: 985
Saving diff files
Loading Dataset: HP1-RB - content-translated-with_original.csv
Pruning Dataset HP1-RB with 5162 Entries
 - 5162 Entries left after Length Cut (min=31, max=4000)
 - 5162 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=5162.0), HTML(value='')))


Calculate WER
WER: 0.027500941383205725
No. of bad translated snippets: 1275
Saving diff files
Loading Dataset: HP2-FvM - content-translated-with_original.csv
Pruning Dataset HP2-FvM with 10892 Entries
 - 10892 Entries left after Length Cut (min=31, max=4000)
 - 10892 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=10892.0), HTML(value='')))


Calculate WER
WER: 0.020322076953552867
No. of bad translated snippets: 1013
Saving diff files
Loading Dataset: HP2-RB - content-translated-with_original.csv
Pruning Dataset HP2-RB with 5123 Entries
 - 5123 Entries left after Length Cut (min=31, max=4000)
 - 5123 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=5123.0), HTML(value='')))


Calculate WER
WER: 0.038900674564393486
No. of bad translated snippets: 1349
Saving diff files
Loading Dataset: HP3-FvM - content-translated-with_original.csv
Pruning Dataset HP3-FvM with 13161 Entries
 - 13161 Entries left after Length Cut (min=31, max=4000)
 - 13161 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=13161.0), HTML(value='')))


Calculate WER
WER: 0.020517263627481703
No. of bad translated snippets: 1413
Saving diff files
Loading Dataset: HP3-RB - content-translated-with_original.csv
Pruning Dataset HP3-RB with 8117 Entries
 - 8117 Entries left after Length Cut (min=31, max=4000)
 - 8117 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=8117.0), HTML(value='')))


Calculate WER
WER: 0.08832504362594601
No. of bad translated snippets: 2486
Saving diff files
Loading Dataset: HP4-FvM - content-translated-with_original.csv
Pruning Dataset HP4-FvM with 23579 Entries
 - 23579 Entries left after Length Cut (min=31, max=4000)
 - 23579 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=23579.0), HTML(value='')))


Calculate WER
WER: 0.025355855204298654
No. of bad translated snippets: 2827
Saving diff files
Loading Dataset: HP4-RB - content-translated-with_original.csv
Pruning Dataset HP4-RB with 15499 Entries
 - 15499 Entries left after Length Cut (min=31, max=4000)
 - 15499 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=15499.0), HTML(value='')))


Calculate WER
WER: 0.07808772751533108
No. of bad translated snippets: 5137
Saving diff files
Loading Dataset: HP5-FvM - content-translated-with_original.csv
Pruning Dataset HP5-FvM with 31823 Entries
 - 31823 Entries left after Length Cut (min=31, max=4000)
 - 31823 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=31823.0), HTML(value='')))


Calculate WER
WER: 0.021264843965755315
No. of bad translated snippets: 3499
Saving diff files
Loading Dataset: HP5-RB - content-translated-with_original.csv
Pruning Dataset HP5-RB with 24963 Entries
 - 24963 Entries left after Length Cut (min=31, max=4000)
 - 24963 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=24963.0), HTML(value='')))


Calculate WER
WER: 0.038650460722096734
No. of bad translated snippets: 5212
Saving diff files
Loading Dataset: HP6-FvM - content-translated-with_original.csv
Pruning Dataset HP6-FvM with 20319 Entries
 - 20319 Entries left after Length Cut (min=31, max=4000)
 - 20319 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=20319.0), HTML(value='')))


Calculate WER
WER: 0.005633191868508054
No. of bad translated snippets: 731
Saving diff files
Loading Dataset: HP6-RB - content-translated-with_original.csv
Pruning Dataset HP6-RB with 14176 Entries
 - 14176 Entries left after Length Cut (min=31, max=4000)
 - 14176 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=14176.0), HTML(value='')))


Calculate WER
WER: 0.011267135932158978
No. of bad translated snippets: 1164
Saving diff files
Loading Dataset: HP7-FvM - content-translated-with_original.csv
Pruning Dataset HP7-FvM with 23624 Entries
 - 23624 Entries left after Length Cut (min=31, max=4000)
 - 23624 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=23624.0), HTML(value='')))


Calculate WER
WER: 0.007154795025473494
No. of bad translated snippets: 1115
Saving diff files
Loading Dataset: HP7-RB - content-translated-with_original.csv
Pruning Dataset HP7-RB with 17331 Entries
 - 17331 Entries left after Length Cut (min=31, max=4000)
 - 17331 Entries left after Action Cut
Translate all


HBox(children=(FloatProgress(value=0.0, max=17331.0), HTML(value='')))


Calculate WER
WER: 0.03600147887974859
No. of bad translated snippets: 3773
Saving diff files


In [12]:
print('finished')
widget_factory.play_audio_file(url="http://www.w3schools.com/html/horse.ogg")

In [3]:
import math
print(math.log10(0.007816708213807012)**2)

4.439348283076292


In [4]:
print(math.log10(0.11)**2)

0.9189279840681579


In [5]:
import audioread

f_name = f'/home/ki-mo/shared/NLP-Data/audio/NLP - Harry Potter/HP1-FvM/CD03 - 03 - In der Winkelgasse (01492-01523).mp3'

def show_audioread_mp3(mp3_file):
    with audioread.audio_open(mp3_file) as file:
        print('Input file: %i channels at %i Hz; %.1f seconds.' %(f.channels, f.samplerate, f.duration))

        for chunk in file:
            i = i + 1
            print(f'{i:d}Type: {type(chunk)}, Len: {len(chunk)}')
            
show_audioread_mp3(f_name)

NoBackendError: 