In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
!pip install tensorflow-gpu==1.15.0

Collecting tensorflow-gpu==1.15.0
  Downloading tensorflow_gpu-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (411.5 MB)
[K     |████████████████████████████████| 411.5 MB 7.2 kB/s 
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Collecting tensorboard<1.16.0,>=1.15.0
  Downloading tensorboard-1.15.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 45.0 MB/s 
[?25hCollecting tensorflow-estimator==1.15.1
  Downloading tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503 kB)
[K     |████████████████████████████████| 503 kB 55.5 MB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 8.1 MB/s 
Building wheels for collected packages: gast
  Building wheel for gast (setup.py) ... [?25l[?25hdone
  Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl size=7554 sha256=f46d5b7ab778b6b1c6ecebd8646216e20766f89f348439734d0af1a34c2d5dbe
  Stored in 

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
import os
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/NVIDIA/OpenSeq2Seq.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # clone and install dependencies
  !git clone -q --depth 1 {git_repo_url}
  !pip uninstall -y -q pymc3
  !pip install --upgrade joblib
  !pip install -q youtube-dl librosa python_speech_features sentencepiece
  
  # create eval config
  !cp {project_name}/example_configs/speech2text/jasper10x5_LibriSpeech_nvgrad.py {project_name}/conf.py
  !sed -i -e 's/\/data\/librispeech\/librivox-test-clean/test/' {project_name}/conf.py
  !echo 'backend = "librosa"' >> {project_name}/conf.py 
  # !echo "wav_filename, wav_filesize, transcript" > {project_name}/test.csv
  # !echo "test.wav, UNUSED, UNUSED" >> {project_name}/test.csv
  
import sys
sys.path.append(project_name)

[K     |████████████████████████████████| 1.9 MB 15.0 MB/s 
[K     |████████████████████████████████| 1.2 MB 52.2 MB/s 
[?25h  Building wheel for python-speech-features (setup.py) ... [?25l[?25hdone


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
fpaths = []
labels = []
spoken = []
for f in os.listdir('/content/drive/MyDrive/ChildesDataset'):
    for w in os.listdir('/content/drive/MyDrive/ChildesDataset/' + f):
        fpaths.append('/content/drive/MyDrive/ChildesDataset/' + f + '/' + w)
        labels.append(f)
        if f not in spoken:
            spoken.append(f)
print('Words spoken:', spoken)

Words spoken: ['and bees come out of the hive', 'a boy looking at the frog', 'and and he skipped out the the the bottle', 'and a rat came popped out of the hole', 'and and the girl looked at him', 'and get the dog and he was angry', 'a frog and a dog and a kid', 'a frog a dog and a girl', 'and because he cracked his egg', 'a frog and there is no frog in the bottle now']


In [None]:
import pandas as pd
train_df = pd.DataFrame.from_dict({"path": fpaths, "label": labels})

In [None]:
train_files, train_labels = list(train_df["path"]), list(train_df["label"])

In [None]:
import os
import json
import librosa
import soundfile as sf
from sklearn.model_selection import train_test_split

In [None]:
from scipy import signal

In [None]:
def prepare_data(files, labels, data_dir="data/", i=0):
    dictionary = dict()
    for file, label in zip(files, labels):
        wave_file = file.split("/")[-1]
        wave_file = wave_file.replace(".mp3", ".wav")
        write_file = os.path.join(data_dir, wave_file)
        samples , sr = librosa.load(file, sr=16000)
        samples = signal.resample(samples, 16000)
        if(len(samples)== 16000) : 
          duration = librosa.get_duration(y=samples, sr=16000)
          sf.write(write_file, samples, sr, 'PCM_16')
          dictionary[str(i)] = {
              "wav": "{data_root}/" + wave_file,
              "length": duration,
              "words": str(label),
              }
        i += 1
    return dictionary, i

In [None]:
#os.makedirs("data/")
train_dict, counter = prepare_data(train_files, train_labels)

In [None]:
import pandas as pd    
df = pd.DataFrame(train_labels)
df.to_csv("text.csv")

In [None]:
df1 = pd.DataFrame(train_files)
df1.to_csv("test.csv")

In [None]:
!mv /content/data/* {project_name}

In [None]:
def download_from_google_drive(file_id, file_name):
  # download a file from the Google Drive link
  !rm -f ./cookie
  !curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id={file_id}" > /dev/null
  confirm_text = !awk '/download/ {print $NF}' ./cookie
  confirm_text = confirm_text[0]
  !curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm={confirm_text}&id={file_id}" -o {file_name}
  
if not exists(join(project_name, 'w2l_log_folder')):
  download_from_google_drive('1gzGT8HoVNKY1i5HNQTKaSoCu7JHV4siR', 'jasper_10x5_dr_sp_nvgrad.zip')
  !unzip jasper_10x5_dr_sp_nvgrad.zip
  !mv checkpoint {project_name}/jasper_log_folder

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0   1651      0 --:--:-- --:--:-- --:--:--  1651
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 2960M  100 2960M    0     0   142M      0  0:00:20  0:00:20 --:--:-- 90.2M
Archive:  jasper_10x5_dr_sp_nvgrad.zip
   creating: checkpoint/
  inflating: checkpoint/checkpoint   
  inflating: checkpoint/model.ckpt-439200.data-00000-of-00001  
  inflating: checkpoint/model.ckpt-439200.index  
  inflating: checkpoint/model.ckpt-439200.meta  


In [None]:
import scipy.signal 
from scipy import signal
import librosa

In [None]:
!cd {project_name} && python run.py --config_file conf.py --mode=infer --infer_output_file=LMFiles10.txt --use_horovod=False --num_gpus=1 --batch_size_per_gpu 1












The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



*** Restoring from the latest checkpoint
*** Loading model from jasper_log_folder/model.ckpt-439200
*** Inference config:
{'batch_size_per_gpu': 1,
 'data_layer': <class 'open_seq2seq.data.speech2text.speech2text.Speech2TextDataLayer'>,
 'data_layer_params': {'backend': 'librosa',
                       'dataset_files': ['test.csv'],
                       'dither': 1e-05,
                       'input_type': 'logfbank',
                       'norm_per_feature': True,
                       'num_audio_features': 64,
                       'pad_to': 16,
                       'precompute_mel_basis': True,
          

In [None]:
!cat '/content/actual.txt'

than he called it now to a fog
uhh he's looking at it
there's a dog and a girl a frog in the jar
then he's like happy with the dog on his head
then he's like he thinks he might be in there
then the bee's nest falls
then he's like cause a skunk came out
then he's climbing up a tree shouting into a hole
then he's carrying it away and the dog running away from the deer
then he's calling out to the frog

In [None]:
!cat '/content/Predictions.txt'

when he didn't want to anything by
oh she seems to like it
is often built on a shock in the picture
then he's like happier with a dog on his head
then you must think it might be an hour
than the bees nest fools
than he is i because the ascan came out
then he was climbing up a twig shouting into a howl
then he's coming in it away and i don't want it away from madame
than he called it now to a fog

In [None]:
!pip install jiwer

Collecting jiwer
  Downloading jiwer-2.3.0-py3-none-any.whl (15 kB)
Collecting python-Levenshtein==0.12.2
  Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)
[?25l[K     |██████▌                         | 10 kB 27.6 MB/s eta 0:00:01[K     |█████████████                   | 20 kB 27.3 MB/s eta 0:00:01[K     |███████████████████▌            | 30 kB 22.3 MB/s eta 0:00:01[K     |██████████████████████████      | 40 kB 17.3 MB/s eta 0:00:01[K     |████████████████████████████████| 50 kB 4.3 MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.2-cp37-cp37m-linux_x86_64.whl size=149868 sha256=09bd704779fddb6f1bee31f9676a7beea3a03f559020eb9410fe9b3ea9a7893b
  Stored in directory: /root/.cache/pip/wheels/05/5f/ca/7c4367734892581bb5ff896f15027a932c551080b2abd3e00d
Successfully built python-Levenshtein
Installing collected packa

In [None]:
with open('/content/actual.txt') as f:
    LMcontents = f.readlines()

In [None]:
file = open("/content/actual.txt","r+") 
file.readline(0)
X1 = file.readlines()
file.close()

In [None]:
LMPredictions = list(X1)

In [None]:
file2 = open("/content/Predictions.txt","r+") 
file2.readline(0)
X2 = file2.readlines()
file2.close()

In [None]:
NoLMPredictions = list(X2)

In [None]:
NoLMPredictions

["when he didn't want to anything by\n",
 'oh she seems to like it\n',
 'is often built on a shock in the picture\n',
 "then he's like happier with a dog on his head\n",
 'then you must think it might be an hour\n',
 'than the bees nest fools\n',
 'than he is i because the ascan came out\n',
 'then he was climbing up a twig shouting into a howl\n',
 "then he's coming in it away and i don't want it away from madame\n",
 'than he called it now to a fog']

In [None]:
from jiwer import wer

In [None]:
NoLMwer_list = []
for i in range(10):
    wer_temp2 = wer(LMPredictions,NoLMPredictions)
    NoLMwer_list.append(wer_temp2)

In [None]:
NoLMwer_list

[0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034,
 0.6551724137931034]

In [None]:
Avg_wer = sum(NoLMwer_list) / len(NoLMwer_list)
Avg_wer

0.6551724137931033