In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define categories and their respective sounds
CATEGORIES = {
    'Animal': ['dog', 'cat', 'cow', 'tiger', 'lion'],
    'Vehicle': ['car horn', 'engine sound', 'siren'],
    'Nature': ['rain', 'thunder', 'wind'],
    'Music': ['piano', 'guitar', 'drum'],
    'Industrial': ['drilling']
}

# Load dataset and extract features
def extract_features(file_path, n_mfcc=40):
    try:
        if not os.path.isfile(file_path):
            print(f'File not found: {file_path}')
            return None
        audio, sample_rate = librosa.load(file_path, sr=22050)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc, n_fft=1024, hop_length=512)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
    except Exception as e:
        print(f'Error encountered while parsing file: {file_path}', e)
        return None
    return mfccs_scaled

# Load data
audio_dataset_path = 'sound_datasets/audio'
metadata = pd.read_csv('sound_datasets/metadata/Sound.csv')
metadata.columns = metadata.columns.str.strip()  # Fix column names
 
features = []
for _, row in metadata.iterrows():
    file_path = os.path.join(audio_dataset_path, f'fold{row["fold"]}/{row["slice_file_name"]}')
    class_label = row['class']
    data = extract_features(file_path)
    if data is not None:
        features.append([data, class_label])

# Prepare data
features_df = pd.DataFrame(features, columns=['feature', 'class'])
X = np.array(features_df['feature'].tolist())
y = np.array(features_df['class'].tolist())
labelencoder = LabelEncoder()
y = to_categorical(labelencoder.fit_transform(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Model architecture
model = Sequential([
    Dense(256, activation='relu', input_shape=(40,)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(y.shape[1], activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)

# Model training
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, callbacks=[early_stopping, reduce_lr])

# Predict category and specific sound
def get_category(sound_class):
    for category, sounds in CATEGORIES.items():
        if sound_class in sounds:
            return category
    return 'Unknown'

def predict_sound(file_path):
    features = extract_features(file_path)
    if features is not None:
        features = np.expand_dims(features, axis=0)
        prediction = model.predict(features)
        predicted_class = labelencoder.inverse_transform([np.argmax(prediction)])[0]
        predicted_category = get_category(predicted_class)
        print(f'The predicted category is: {predicted_category}, and the sound is: {predicted_class}')
    else:
        print('Failed to extract features.')

# Example usage
predict_sound('sound_datasets/audio/fold10/7913-3-3-0.wav')

# Enhanced Visualizations
plt.figure(figsize=(12, 5))
sns.set_style("whitegrid")

plt.subplot(1, 2, 1)
sns.lineplot(x=range(len(history.history['accuracy'])), y=history.history['accuracy'], label='Training Accuracy')
sns.lineplot(x=range(len(history.history['val_accuracy'])), y=history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')

plt.subplot(1, 2, 2)
sns.lineplot(x=range(len(history.history['loss'])), y=history.history['loss'], label='Training Loss')
sns.lineplot(x=range(len(history.history['val_loss'])), y=history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Model Loss')

plt.tight_layout()
plt.show()
