In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from IPython import display
from jiwer import wer
import librosa
import numpy as np
import soundfile
import json
import dill
import random
from python_speech_features import mfcc
import librosa
import scipy.io.wavfile as wav
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

from keras import backend as K
from keras.models import Model
from keras.layers import (BatchNormalization, Conv1D, Dense, Input, 
    TimeDistributed, Activation, Bidirectional, SimpleRNN, GRU, LSTM)
from keras.utils.vis_utils import plot_model


import _pickle as pickle
from numpy.lib.stride_tricks import as_strided

from keras.layers import (Input, Lambda)
from tensorflow.keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint   
import os
import mlflow
import mlflow.tensorflow

In [2]:
# loading scripts
sys.path.insert(1, '../scripts')
sys.path.append("..")
sys.path.append(".")

from data_cleaning import DataCleaner
from data_viz import Data_Viz
import acoustic_modeling as AM

DC = DataCleaner("../logs/preprocessing_notebook.log")
DV = Data_Viz()





In [3]:
# defining parameters

MIN_BATCH_SIZE = 1
MFCC_DIME = 13
WINDOW = 20          # in ms
STEP = 10            # in ms
MAX_FREQ = 8000      # in Hz
MODEL_NAME = "RNN_model"
EPOCHS = 10
NFFT = 1024
EXP_NAME = "Transcription_Tests"
RUN_NAME = "run_1"

In [4]:
try:
    experiment_id = mlflow.create_experiment(name=EXP_NAME)
    print("not here")
except:
    print("here")
    experiment_id = mlflow.get_experiment_by_name(name=EXP_NAME).experiment_id
# Run name is a string that does not have to be unique
print("now here")
mlflow_run = mlflow.start_run(experiment_id=experiment_id, run_name=RUN_NAME)

here
now here


In [5]:
# loading meta data

train_meta = DC.meta_loader("../data/train_meta.csv", "csv")
valid_meta = DC.meta_loader("../data/test_meta.csv", "csv")

print(f"Size of the training set: {len(train_meta)}")
print(f"Size of the validation set: {len(valid_meta)}")
train_meta.head()

Size of the training set: 800
Size of the validation set: 200


Unnamed: 0.1,Unnamed: 0,Target,Feature,Output,Duration,n_channel
0,0,የኤርትራ ወታደሮች ኢትዮጵያውያኑ ን ቀር ባ እንዳ ታይ ና እንዳታ ነጋገር...,../data/train/wav/tr_1025_tr11026.wav,../data/train_new/tr_1025_tr11026.wav,9.977324,2
1,1,በ ላቸው ም ነው እንዲ ህ ተ ንቆ ለ ጳ ጰ ሰ,../data/train/wav/tr_10602_tr04116.wav,../data/train_new/tr_10602_tr04116.wav,9.977324,2
2,2,ጣት ሽን አን ቋ ቂው ም አታንቋቂ ውም ማ ንም አላ የ ሽም,../data/train/wav/tr_10722_tr03139.wav,../data/train_new/tr_10722_tr03139.wav,9.977324,2
3,3,ባለሙያዎቹ በ ተጨማሪ ም እነዚህ ዲ ቃሎች የ ኢኮሎጂ መዛባት ሊያ ስከትሉ...,../data/train/wav/tr_10171_tr099013.wav,../data/train_new/tr_10171_tr099013.wav,9.977324,2
4,4,አ በ በ ለ ዶክተሩ ሁለመና ዬን ነው የሚያመ ኝ ብሎ ነገረው,../data/train/wav/tr_10800_tr02123.wav,../data/train_new/tr_10800_tr02123.wav,9.977324,2


In [6]:
# replace redundant letters

train_meta["Target"] = train_meta["Target"].apply(lambda x: AM.replacer(x))
valid_meta["Target"] = valid_meta["Target"].apply(lambda x: AM.replacer(x))


In [7]:
audio_gen = AM.AudioGenerator(train_meta, valid_meta, minibatch_size=MIN_BATCH_SIZE,
                       window=WINDOW, step=STEP, max_freq=MAX_FREQ,
                       mfcc_dim=MFCC_DIME, nfft=NFFT)

audio_gen.load_train_data()
audio_gen.load_validation_data()

In [8]:
model = AM.model_1(input_dim=13,
                units=5,
                activation='relu',
                output_dim=len(AM.char_map)+1)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       [(None, None, 13)]        0         
_________________________________________________________________
rnn (GRU)                    (None, None, 5)           300       
_________________________________________________________________
batch_normalization (BatchNo (None, None, 5)           20        
_________________________________________________________________
time_distributed (TimeDistri (None, None, 223)         1338      
_________________________________________________________________
softmax (Activation)         (None, None, 223)         0         
Total params: 1,658
Trainable params: 1,648
Non-trainable params: 10
_________________________________________________________________
None


In [9]:

# training the model.
AM.train(audio_gen, input_to_softmax=model, model_name=MODEL_NAME, epochs=EPOCHS, minibatch_size=MIN_BATCH_SIZE, verbose=2)

    

Epoch 1/10


In [None]:
# pickling the model
# save model loss

with open(filename, 'wb') as f:
    dill.dump(model, f)

In [None]:
loaded_model = dill.load(open(filename, 'rb'))

In [None]:
WER = AM.predict(audio_gen,14, 'train', loaded_model)
#raw_pred_char = np.vstack([sorted(AM.char_map.keys(), key=lambda k: AM.char_map[k]) + ['BLANK'], raw_pred])

In [None]:
# tracking the experiment
with mlflow_run:
    mlflow.log_param("Epochs", EPOCHS)
    mlflow.log_param("Batch_Size", MIN_BATCH_SIZE)
    mlflow.log_metric("WER", WER)