In [45]:
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import wavfile #for audio processing
import os
import pickle
import pandas as pd
import librosa   #for audio processing


In [47]:
train_path = "../data/train/"
test_path = "../data/test/"

In [48]:
train_audio_folder  = train_path + "/wav/"
train_script_file = train_path + "trsTrain.txt"

In [49]:
def extract_transcription_and_labels(file_path):
    transcriptions  = []
    with open(file_path) as f:
        line = f.readline()
        while (line):
            transcriptions.append(line)
            line = f.readline()
    
    labels = []
    label_trans_dict = dict()
    for trans in transcriptions:
        text = trans.replace("<s>", "").replace("</s>", "")
        text = text.replace("</s>", "")
        text = text.strip()

        label = text.split()[-1]

        try:
            label = label.replace("(", "")
            label = label.replace(")", "")  
        except:
            pass

        translation = text.split()[:-1]
        translation = ' '.join(translation)

        label_trans_dict[label] = translation

    return label_trans_dict
    

In [50]:
def get_all_wav_paths(folder_path):
    return os.listdir(folder_path)
  


In [51]:
def extract_audio(path, max_lenght=10000, sr=8000):
    
    wav_dict = dict()
    wav_paths = get_all_wav_paths(path)
    for path in wav_paths:
        if len(list(wav_dict.keys())) >= max_lenght:
            break
        wav, sample_rate = librosa.load(train_audio_folder+path, sr=8000)
        dur = float(len(wav)/sample_rate)
        channel = len(wav.shape)
        label = path.split(".")[0]
        wav_dict[label] = (wav, dur, channel, sample_rate)
        
    return wav_dict


In [52]:

def create_meta_data(transcripton_obj, audo_obj):
    translations = []
    durations = []
    labels = []
    channels = []
    srs = []
    for k in audo_obj.keys():
        trans = transcripton_obj[k]
        label = k

        duration = audo_obj[k][1]
        channel = audo_obj[k][2]
        sr = audo_obj[k][3]

        translations.append(trans)
        durations.append(duration)
        labels.append(label)
        channels.append(channel)
        srs.append(sr)

        m_df = pd.DataFrame()
        m_df["translation"] = translations
        m_df["label"] = labels
        m_df["channel"] = channels
        m_df["sample_rate"] = srs
        m_df["duration"] = durations

    return m_df
  



In [53]:
def read_obj(path):
    with open(path, "rb") as f:
        return pickle.load(f)
    
def write_obj(path, obj):
    with open(path, "wb") as f:
        pickle.dump(obj, f)

In [54]:
def read_csv(csv_path, missing_values=[]):
    try:
        df = pd.read_csv(csv_path, na_values=missing_values)
        print("file read as csv")
        return df
    except FileNotFoundError:
        print("file not found")
def save_csv(df, csv_path):
    try:
        df.to_csv(csv_path, index=False)
        print('File Successfully Saved.!!!')

    except Exception:
        print("Save failed...")

    return df

In [55]:
translation_obj = extract_transcription_and_labels(train_script_file)
audio_dict = extract_audio(train_audio_folder, 50)

Exception ignored in: <function Wave_write.__del__ at 0x1a1c2344d0>
Traceback (most recent call last):
  File "/Users/daniel/opt/anaconda3/lib/python3.7/wave.py", line 327, in __del__
    self.close()
  File "/Users/daniel/opt/anaconda3/lib/python3.7/wave.py", line 445, in close
    self._ensure_header_written(0)
  File "/Users/daniel/opt/anaconda3/lib/python3.7/wave.py", line 465, in _ensure_header_written
    raise Error('sample width not specified')
wave.Error: sample width not specified


In [56]:

write_obj("audio_dict.pkl", audio_dict)
write_obj("translation_dict.pkl", translation_obj)


In [57]:
translation_obj = read_obj("./translation_dict.pkl")
audio_obj = read_obj("./audio_dict.pkl")
meta_data = create_meta_data(translation_obj, audio_obj)

In [58]:
meta_data

Unnamed: 0,translation,label,channel,sample_rate,duration
0,የተለያዩ የ ትግራይ አውራጃ ተወላጆች ገንዘባቸው ን አዋጥ ተው የ ልማት ...,tr_10000_tr097082,1,8000,9.088
1,የ ጠመንጃ ተኩስ ተከፈተ ና አራት የኤርትራ ወታደሮች ተገደሉ,tr_10001_tr097083,1,8000,5.632
2,ላነሷቸው ጥያቄዎች የ ሰጡት ን መልስ አቅርበ ነዋል,tr_10002_tr097084,1,8000,6.144
3,እ ብዱ አስፋልቱ ላይ የ ኰለኰ ለ ው ድንጋይ መኪና አላ ሳልፍ አለ,tr_10003_tr097085,1,8000,5.76
4,ጠጁ ን ኰ መኰ መ ኰ መኰ መ ና ሚስቱ ን ሲ ያሰቃ ያት አደረ,tr_10004_tr097086,1,8000,5.376
5,ድንቹ በ ደንብ ስለተኰተኰተ በ ጥሩ ሁኔታ ኰረተ,tr_10005_tr097087,1,8000,6.656
6,በ ድህነቱ ላይ ይህ ክፉ በሽታ ስለ ያዘው ሰውነቱ በጣም ኰ ሰሰ,tr_10006_tr097088,1,8000,6.528
7,በሩን እንዲ ህ በ ሀይል አታንኳኲ ብዬ አልነበረ ም እንዴ,tr_10007_tr097089,1,8000,5.504
8,በለጠ ች የ በየነ የ በኩር ልጅ ነች,tr_10008_tr097090,1,8000,4.48
9,የ ቆላ ቁስል ና ቁርጥ ማት በጣም አሰቃቂ በሽታዎች ናቸው,tr_10009_tr097091,1,8000,4.864


In [59]:
save_csv(meta_data, "meta_data.csv")

File Successfully Saved.!!!


Unnamed: 0,translation,label,channel,sample_rate,duration
0,የተለያዩ የ ትግራይ አውራጃ ተወላጆች ገንዘባቸው ን አዋጥ ተው የ ልማት ...,tr_10000_tr097082,1,8000,9.088
1,የ ጠመንጃ ተኩስ ተከፈተ ና አራት የኤርትራ ወታደሮች ተገደሉ,tr_10001_tr097083,1,8000,5.632
2,ላነሷቸው ጥያቄዎች የ ሰጡት ን መልስ አቅርበ ነዋል,tr_10002_tr097084,1,8000,6.144
3,እ ብዱ አስፋልቱ ላይ የ ኰለኰ ለ ው ድንጋይ መኪና አላ ሳልፍ አለ,tr_10003_tr097085,1,8000,5.76
4,ጠጁ ን ኰ መኰ መ ኰ መኰ መ ና ሚስቱ ን ሲ ያሰቃ ያት አደረ,tr_10004_tr097086,1,8000,5.376
5,ድንቹ በ ደንብ ስለተኰተኰተ በ ጥሩ ሁኔታ ኰረተ,tr_10005_tr097087,1,8000,6.656
6,በ ድህነቱ ላይ ይህ ክፉ በሽታ ስለ ያዘው ሰውነቱ በጣም ኰ ሰሰ,tr_10006_tr097088,1,8000,6.528
7,በሩን እንዲ ህ በ ሀይል አታንኳኲ ብዬ አልነበረ ም እንዴ,tr_10007_tr097089,1,8000,5.504
8,በለጠ ች የ በየነ የ በኩር ልጅ ነች,tr_10008_tr097090,1,8000,4.48
9,የ ቆላ ቁስል ና ቁርጥ ማት በጣም አሰቃቂ በሽታዎች ናቸው,tr_10009_tr097091,1,8000,4.864


In [60]:
def add_noise(data, noise_factor):
    
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def shift_signal(data, sampling_rate, shift_max, shift_direction):
    
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif self.shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

def change_picth(data, sampling_rate, pitch_factor):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
                                       
def change_speed(data, speed_factor):
    return librosa.effects.time_stretch(data, speed_factor)
                                       

In [61]:
sample_audio = audio_obj['tr_10032_tr097114']
rate = sample_audio[-1]

In [62]:
ipd.Audio(sample_audio[0], rate=rate)