In [22]:
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.utils import to_categorical
import os

# Define data paths and genre labels
data_path = "C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original"
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
num_genres = len(genres)
sample_rate = 22050  # Assuming audio files have this sample rate (adjust if needed)
duration = 3  # Desired duration in seconds

# Function to extract MFCCs and labels, limiting to 3 seconds
def extract_mfcc(file_path, label, sr):
  try:
    y, sr = librosa.load(file_path, duration=duration * sr)
    if y is None:
      raise ValueError(f"Error loading audio file: {file_path}")
  except Exception as e:  # Catch any loading errors
    print(f"Error loading audio file {file_path}: {e}")
    return None, None  # Return None for both MFCCs and sample rate

  mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)  # Specify number of MFCCs (optional)
  mfccs = mfccs.T  # Transpose

  # Zero-pad to fixed length (assuming all sequences are shorter than 3 seconds)
  max_len = int(sr * duration)  # Maximum number of time steps
  pad_width = (0, max_len - mfccs.shape[0])
  mfccs = np.pad(mfccs, pad_width, mode='constant', constant_values=0)

  return mfccs, sr

# Load data
features = []
labels = []
for genre in genres:
  genre_path = f"{data_path}/{genre}"
  # Check if genre directory exists
  if not os.path.exists(genre_path):
    raise ValueError(f"Genre directory not found: {genre_path}")
  for filename in os.listdir(genre_path):
    file_path = os.path.join(genre_path, filename)
    mfccs, sr = extract_mfcc(file_path, genre, sr)  # Include 'sr' argument
    features.append(mfccs)
    # Create a one-hot encoded label for each genre (using np.eye)
    label_idx = genres.index(genre)  # Find the index of the genre in the 'genres' list
    one_hot_label = np.eye(num_genres)[label_idx]  # Create a one-hot vector
    labels.append(one_hot_label)

# Check for empty features list before splitting
if not features:
  raise ValueError("No features found! Ensure audio files are processed correctly.")

# Print features after conversion (for debugging)
print(f"Features after conversion: {features}")

# Convert features to numpy arrays (important for reshaping)
features = np.array(features)
labels = np.array(labels)  # Convert labels to a numpy array as well

# Split data into training, validation, and testing sets
from sklearn.model_selection import train_test_split
# Print features after conversion (check for emptiness or unexpected content)
print(f"Features after conversion: {features}")

# Split data
# Stack features into a single array (assuming consistent shapes)
X_train = np.stack(features[:int(len(features)*0.8)], axis=0)  # 80% for training
X_val = np.stack(features[int(len(features)*0.8):], axis=0)  # 20% for validation

# Check for empty X_train before accessing its shape
if not X_train.shape:
  raise ValueError("X_train might be empty. Ensure audio files are loaded and processed correctly.")

try:
  n_timesteps = X_train.shape[1]  # Number of time steps in an MFCC sequence
  n_features = X_train.shape[2]  # Number of MFCC coefficients
except IndexError:
  raise ValueError("Unexpected error during shape access. Check data loading process.")

X_train = X_train.reshape((X_train.shape[0], n_timesteps, n_features))
X_val = X_val.reshape((X_val.shape[0], n_timesteps, n_features))
X_test = X_test.reshape((X_test.shape[0], n_timesteps, n_features))


# Define the LSTM model (same as before)
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(n_timesteps, n_features)))
model.add(Dropout(0.2))
model.add(LSTM(units=64))
model.add(Dropout(0.2))
model.add(Dense(num_genres, activation="softmax"))

# Compile the model (same as before)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model (same as before)
model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val))

# Evaluate the model (same as before)
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Use the model for prediction (optional, same as before)

MemoryError: Unable to allocate 16.0 GiB for an array with shape (66150, 64877) and data type float32