In [1]:
import sounddevice as sd
import numpy as np
import librosa
import soundfile as sf
import tkinter as tk
from tkinter import messagebox
from tensorflow.keras.models import load_model
import pickle

# Load pre-trained model and label encoder
model = load_model('MSTCNN_model.h5')
with open('label_encoder.pkl', 'rb') as f:
    encoder = pickle.load(f)

# Define parameters
SAMPLE_RATE = 22050
DURATION = 30
NUM_MFCC = 13
N_FFT = 2048
HOP_LENGTH = 512
EXPECTED_MFCC_LEN = 1320  # Expected number of frames in MFCC for the model

# Variable to store last recorded audio
last_audio = None

# Record real-time audio
def record_audio(duration=DURATION, sr=SAMPLE_RATE):
    global last_audio
    print("Recording started...")
    audio = sd.rec(int(duration * sr), samplerate=sr, channels=1)
    sd.wait()  # Wait until recording is finished
    print("Recording finished.")
    last_audio = np.squeeze(audio)  # Store the recorded audio for playback
    return last_audio

# Process audio to MFCC with padding/truncation
def preprocess_audio(audio, sr=SAMPLE_RATE, num_mfcc=NUM_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, expected_len=EXPECTED_MFCC_LEN):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
    # Pad or truncate to ensure consistent length
    if mfcc.shape[1] < expected_len:
        mfcc = np.pad(mfcc, ((0, 0), (0, expected_len - mfcc.shape[1])), mode='constant')
    else:
        mfcc = mfcc[:, :expected_len]
    return np.expand_dims(mfcc.T, axis=0)  # Add batch dimension for model input

# Classify genre of audio
def classify_genre(audio_mfcc):
    prediction = model.predict(audio_mfcc)
    genre_index = np.argmax(prediction, axis=1)
    genre_label = encoder.inverse_transform(genre_index)
    confidence = prediction[0][genre_index[0]] * 100  # Confidence in percentage
    return genre_label[0], confidence

# GUI functions
def classify_audio():
    audio = record_audio()  # Record audio
    audio_mfcc = preprocess_audio(audio)  # Process to MFCC
    genre, confidence = classify_genre(audio_mfcc)  # Predict genre
    messagebox.showinfo("Genre Classification", f"Predicted Genre: {genre} \nConfidence: {confidence:.2f}%")

def save_audio():
    global last_audio
    if last_audio is not None:
        file_name = f"recording_{np.random.randint(1000, 9999)}.wav"
        sf.write(file_name, last_audio, SAMPLE_RATE)  # Save using soundfile
        messagebox.showinfo("Save Recording", f"Audio saved as {file_name}")
    else:
        messagebox.showerror("Error", "No audio recorded to save.")

def play_audio():
    global last_audio
    if last_audio is not None:
        sd.play(last_audio, samplerate=SAMPLE_RATE)  # Play back the last recorded audio
        sd.wait()  # Wait until playback finishes
    else:
        messagebox.showerror("Error", "No audio recorded to play.")

# GUI interface
root = tk.Tk()
root.title("Real-Time Genre Classification")

# Create GUI buttons
classify_button = tk.Button(root, text="Record and Classify", command=classify_audio)
classify_button.pack(pady=10)

save_button = tk.Button(root, text="Save Recording", command=save_audio)
save_button.pack(pady=10)

play_button = tk.Button(root, text="Play Recording", command=play_audio)
play_button.pack(pady=10)

exit_button = tk.Button(root, text="Exit", command=root.quit)
exit_button.pack(pady=10)

# Run the GUI loop
root.mainloop()




Recording started...
Recording finished.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step


In [6]:
from sklearn.preprocessing import LabelEncoder
import pickle

# Define genres and encoder
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
encoder = LabelEncoder()
encoder.fit(genres)

# Save the encoder
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(encoder, f)


In [4]:
import tensorflow as tf

# Adjust preprocess_audio function to pad MFCCs
def preprocess_audio(audio, sr=SAMPLE_RATE, num_mfcc=NUM_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, expected_len=1320):
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
    # Pad or truncate to ensure consistent length
    if mfcc.shape[1] < expected_len:
        mfcc = np.pad(mfcc, ((0, 0), (0, expected_len - mfcc.shape[1])), mode='constant')
    else:
        mfcc = mfcc[:, :expected_len]
    return np.expand_dims(mfcc.T, axis=0)  # Add batch dimension for model input
