In [None]:
import os
import tarfile
import urllib.request
import numpy as np
import librosa
import tensorflow as tf


In [None]:
# Download the dataset
url = "http://www.openslr.org/resources/12/train-clean-360.tar.gz"
filename = "dev-clean.tar.gz"
urllib.request.urlretrieve(url, filename)

('dev-clean.tar.gz', <http.client.HTTPMessage at 0x7f0f9a5f3f70>)

In [None]:
# Extract the dataset
with tarfile.open(filename, "r:gz") as tar:
    tar.extractall()
    tar.close()

In [None]:
import os
import numpy as np
import librosa

# Function to extract features from audio file
def extract_feature(file_path):
    try:
        audio, sr = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        mfccs_processed = np.mean(mfccs.T,axis=0)
    except Exception as e:
        print("Error encountered while parsing file: ", file_path)
        return None, None
    return np.array([mfccs_processed]), np.array([len(audio)/sr])

# Function to process the entire dataset
def process_dataset(dataset_path):
    X, y = [], []
    labels = []
    label_count = 0
    
    # Iterate through all folders and files in the dataset directory
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.flac'):
                # Extract label from folder name
                label = root.split('/')[-1]
                if label not in labels:
                    labels.append(label)
                    label_count += 1
                label_id = labels.index(label)
                
                file_path = os.path.join(root, file)
                feature, duration = extract_feature(file_path)
                if feature is not None:
                    X.append(feature)
                    y.append(label_id)
    
    X = np.array(X)
    y = np.array(y)
    return X, y, labels

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

# Set seed for reproducibility
np.random.seed(1234)
tf.random.set_seed(1234)

# Load the dataset
X, y, labels = process_dataset('/content/LibriSpeech')


In [None]:
inputs = Input(shape=(X.shape[1], X.shape[2]))
x = Bidirectional(LSTM(units=256, return_sequences=True))(inputs)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(units=256))(x)
x = Dropout(0.3)(x)
x = Dense(units=128, activation='relu')(x)
outputs = Dense(len(labels), activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

In [None]:
history = model.fit(X, y, validation_split=0.2, epochs=50, batch_size=32)


In [None]:
X.shape

In [None]:
len(labels)