In [66]:
import os
import librosa
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.image import resize

In [67]:
#Loading Model
model =  tf.keras.models.load_model('./music_genre_classifier/checkpoint.model.keras')


In [68]:
classes = ['blues', 'classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']

In [69]:
# Load and preprocess audio data
def load_and_preprocess_data(file_path, target_shape=(160, 160)):
    data = []
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    # Perform preprocessing (e.g., convert to Mel spectrogram and resize)
    # Define the duration of each chunk and overlap
    chunk_duration = 4  # seconds
    overlap_duration = 2  # seconds
                
    # Convert durations to samples
    chunk_samples = chunk_duration * sample_rate
    overlap_samples = overlap_duration * sample_rate
                
    # Calculate the number of chunks
    num_chunks = int(np.ceil((len(audio_data) - chunk_samples) / (chunk_samples - overlap_samples))) + 1
                
    # Iterate over each chunk
    for i in range(num_chunks):
                    # Calculate start and end indices of the chunk
        start = i * (chunk_samples - overlap_samples)
        end = start + chunk_samples
                    
                    # Extract the chunk of audio
        chunk = audio_data[start:end]
                    
                    # Compute the Mel spectrogram for the chunk
        mel_spectrogram = librosa.feature.melspectrogram(y=chunk, sr=sample_rate)
                    
                #mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
        mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
        data.append(mel_spectrogram)
    
    return np.array(data)

In [84]:
file_path = "./genres_original/reggae/reggae.00050.wav"
y, sr = librosa.load(file_path, sr=None)
data = load_and_preprocess_data(file_path, target_shape=(160,160))

In [85]:
y_pred = model.predict(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step


In [86]:
y_pred.shape

(15, 10)

In [87]:
pred_categories = np.argmax(y_pred, axis=1)
pred_categories

array([8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2])

In [88]:
unique, counts = np.unique(pred_categories, return_counts=True)
genre = dict(zip(unique,counts))
genre_pred = classes[max(genre, key=genre.get)]
genre_pred

'reggae'