<a href="https://colab.research.google.com/github/NickKornienko/Language-Identification-model/blob/main/LanguageUnderstanding_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import pickle
import warnings
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

In [None]:
file_to_test = '/content/drive/MyDrive/french_sample.mp3'
model_path = '/content/drive/MyDrive/255mode_5layerCNN.h5'
label_encoder_path = '/content/drive/MyDrive/language_le.pkl'

## Local Inference

In [None]:
def extract_features(file_path):
    # Load the audio file
    audio, sample_rate = librosa.load(file_path)
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    # Average MFCCs over time frames
    mfccs_processed = np.mean(mfccs.T, axis=0)
    return mfccs_processed

def load_model(path):
    model = tf.keras.models.load_model(path)
    return model

def load_label_encoder(path):
    le = pickle.load(open(path, 'rb'))
    return le

def predict_for_file(file_path):
    features = extract_features(file_path)
    model = load_model(model_path)
    label_encoder = load_label_encoder(label_encoder_path)
    prediction = model.predict(features.reshape(1,40,1))
    return str(label_encoder.inverse_transform(np.array(prediction.argmax()).reshape(1,))[0])

In [None]:
print(predict_for_file(file_to_test))



french


In [None]:
file_to_test='/content/drive/MyDrive/english_sample.mp3'
print(predict_for_file(file_to_test))



english


In [None]:
file_to_test='/content/drive/MyDrive/spanish_sample.mp3'
print(predict_for_file(file_to_test))


spanish


## Inferene on Endpoint - Hosted on Azure ML Endpoint

In [None]:
import IPython

In [None]:
file_to_test = '/content/drive/MyDrive/french_sample.mp3'
IPython.display.Audio(file_to_test)

In [None]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
def ask_endpoint(file_path):
    features = extract_features(file_path)
    features = features.astype(np.float)
    data = {'data':list(features)}
    body = str.encode(json.dumps(data))

    url = 'https://workbench-sfjcy.eastus2.inference.ml.azure.com/score'
    # Replace this with the primary/secondary key or AMLToken for the endpoint
    api_key = 'WnDUP44lFPA60tbrnTgpvGOLyp00A6vg'
    if not api_key:
        raise Exception("A key should be provided to invoke the endpoint")

    # The azureml-model-deployment header will force the request to go to a specific deployment.
    # Remove this header to have the request observe the endpoint traffic rules
    headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'languagedetection-1' }

    req = urllib.request.Request(url, body, headers)

    try:
        response = urllib.request.urlopen(req)

        result = response.read()
        print(result)
        return json.loads(result)
    except urllib.error.HTTPError as error:
        print("The request failed with status code: " + str(error.code))

        # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
        print(error.info())
        print(error.read().decode("utf8", 'ignore'))
        return None

def load_label_encoder(path):
    le = pickle.load(open(path, 'rb'))
    return le

def predict_using_endpoint(file_path):
    prediction = ask_endpoint(file_path)
    if prediction is None:
        return None
    label_encoder = load_label_encoder(label_encoder_path)
    return str(label_encoder.inverse_transform(np.array(prediction).argmax().reshape(1,))[0])



In [None]:
predict_using_endpoint(file_to_test)


b'[[0.0008803834789432585, 0.9991191029548645, 4.460357274638227e-07]]'


'french'

Testing on English Data

In [None]:
file_to_test_english = '/content/drive/MyDrive/english_sample.mp3'
IPython.display.Audio(file_to_test)

In [None]:
predict_using_endpoint(file_to_test_english)

b'[[0.7499474287033081, 0.25002261996269226, 2.9931037715869024e-05]]'


'english'

Testing on spanish data

In [None]:
file_to_test_spanish = '/content/drive/MyDrive/spanish_sample.mp3'
IPython.display.Audio(file_to_test_spanish)

In [None]:
predict_using_endpoint(file_to_test_spanish)

b'[[1.3515132346597625e-09, 2.4315491842230585e-08, 1.0]]'


'spanish'