In [1]:
# Import the necessary library
import numpy as np  # Library for numerical operations
import librosa
# Define a function to extract features from an audio file
def feature_extractor(file):
    # Load the audio file with a specific sample rate conversion
    audio, sample_rate = librosa.load(file, res_type='kaiser_fast') 
    
    # Extract MFCC (Mel-frequency cepstral coefficients) features from the audio
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    
    # Scale the MFCC features by taking the mean across the time axis
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    
    # Return the scaled MFCC features
    return mfccs_scaled_features

In [2]:
# Import the necessary libraries
import librosa  # Library for audio and music processing
import pandas as pd  # Library for data manipulation and analysis
import os  # Library for interacting with the operating system

# Define the path to the audio dataset
audio_dataset_path = 'UrbanSound8K/audio'

# Load the metadata from the CSV file
metadata = pd.read_csv("UrbanSound8K/metadata/UrbanSound8k.csv")

# Display the first 10 rows of the metadata dataframe
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [4]:
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load the saved Keras model
saved_model_path = 'saved_models/audio_classification.keras'
model = load_model(saved_model_path)

# Define the filename of the audio file for prediction
filename = 'UrbanSound8K/audio/fold9/13579-2-0-17.wav'

# Extract features from the audio file for prediction using the same feature extraction function
prediction_feature = feature_extractor(filename)

# Reshape the extracted feature for prediction (consistent with training input shape)
prediction_feature = prediction_feature.reshape(1, -1)

# Make predictions using the loaded model
prediction_probabilities = model.predict(prediction_feature)

# Determine the predicted class based on the highest probability
predicted_class_label = prediction_probabilities.argmax(axis=-1)

# Get unique class names from the metadata (assuming you have the metadata)
class_names = metadata['class'].unique()

# Create and fit the LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(class_names)

# Inverse transform the predicted class label to get the predicted class name
predicted_class_name = label_encoder.inverse_transform(predicted_class_label)

# Print the predicted class name
print(predicted_class_name)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
['children_playing']
