In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Paths to UrbanSound8K dataset (adjust these to your local paths)
DATASET_PATH = "UrbanSound8K/audio"
METADATA_PATH = "UrbanSound8K/metadata/UrbanSound8K.csv"

# Parameters for Mel-spectrogram
SAMPLE_RATE = 22050
N_MELS = 128
HOP_LENGTH = 512
N_FFT = 2048
SPEC_HEIGHT = 128
SPEC_WIDTH = 128

# Function to load and preprocess audio to Mel-spectrogram
def audio_to_melspectrogram(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
        mel_spec_db = librosa.power_to_db(mel_spec)
        mel_spec_reshaped = np.reshape(mel_spec_db, (mel_spec_db.shape[0], mel_spec_db.shape[1], 1))
        mel_spec_resized = tf.image.resize(mel_spec_reshaped, [SPEC_HEIGHT, SPEC_WIDTH]).numpy()
        return mel_spec_resized
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Load metadata
metadata = pd.read_csv(METADATA_PATH)

# Prepare data
X = []
y = []
failed_files = 0

print("Preprocessing audio files...")

for idx, row in metadata.iterrows():
    file_path = os.path.join(DATASET_PATH, f"fold{row['fold']}", row['slice_file_name'])
    if os.path.exists(file_path):
        mel_spec = audio_to_melspectrogram(file_path)
        if mel_spec is not None:
            X.append(mel_spec)
            y.append(row['class'])
        else:
            failed_files += 1

print(f"Completed preprocessing. Failed files: {failed_files}")
print(f"Successfully processed {len(X)} files")

X = np.array(X)
y = np.array(y)

print(f"X shape: {X.shape}, y shape: {y.shape}")

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  

print(f"Number of classes: {len(label_encoder.classes_)}")
print(f"Classes: {label_encoder.classes_}")

# Train-validation-test split
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y_encoded, test_size=0.1, random_state=42, stratify=y_encoded
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.1111, random_state=42, stratify=y_temp
)

print(f"Train shape: {X_train.shape}, Validation shape: {X_val.shape}, Test shape: {X_test.shape}")

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(SPEC_HEIGHT, SPEC_WIDTH, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Model summary
model.summary()

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=32
)
