In [1]:
import os 
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM
import scipy
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Reshape, Conv2DTranspose, BatchNormalization, Activation
from tensorflow.keras.layers import LeakyReLU
import librosa.display
import matplotlib.pyplot as plt

In [2]:
directory =r"D:\songs for test\songs\test section"

In [3]:
# function to load and preprocess the music files 
def load_audio_files(directory, sr=22050, duration=30):
    audio_data = []  # Create a list to hold the audio data
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.mp3'):
                file_path = os.path.join(root, file)
                try:
                    y, _ = librosa.load(file_path, sr=sr, duration=duration)
                    audio_data.append(y)
                except Exception as e:
                    print(f'Could not parse {file_path}: {e}')
    return audio_data

# Example usage

audio_files = load_audio_files(directory)
print(f'Loaded {len(audio_files)} audio files')


Loaded 474 audio files


In [15]:
def extract_features_from_audio(y, sr=22050, n_mels=128, n_mfcc=13, max_length=130):
    # Mel Spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    if log_mel_spectrogram.shape[1] > max_length:
        log_mel_spectrogram = log_mel_spectrogram[:, :max_length]
    else:
        log_mel_spectrogram = np.pad(log_mel_spectrogram, ((0, 0), (0, max_length - log_mel_spectrogram.shape[1])), mode='constant')
    
    # MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    if mfccs.shape[1] > max_length:
        mfccs = mfccs[:, :max_length]
    else:
        mfccs = np.pad(mfccs, ((0, 0), (0, max_length - mfccs.shape[1])), mode='constant')
    
    # Combine features
    combined_features = np.vstack((log_mel_spectrogram, mfccs))
    
    return combined_features

# Example usage
features = [extract_features_from_audio(y) for y in audio_files]

# Ensure features are consistent in shape
print("Example feature shape:", features[0].shape)



Example feature shape: (141, 130)


In [16]:
# Convert to NumPy arrays
X = np.array(features)

# Add channel dimension for CNN input
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)

# Example target labels (you need to replace this with your actual labels)
y = np.array([...])  # Your target labels here

# Verify the shapes
print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (474, 141, 130, 1)
y shape: (1,)


In [7]:
#using the above function, we can have a list of features to use in the trtaining of the model 
features

[array([[-67.37621 , -51.26617 , -47.838924, ..., -41.81551 , -37.481747,
         -29.4892  ],
        [-68.708244, -54.843536, -52.840454, ..., -16.39291 , -16.717184,
         -17.70154 ],
        [-71.242226, -62.10222 , -56.756805, ..., -16.1513  , -15.893187,
         -18.733723],
        ...,
        [-80.      , -71.124664, -58.5502  , ..., -41.376682, -44.38713 ,
         -50.01422 ],
        [-80.      , -76.45264 , -60.60414 , ..., -43.81664 , -46.806545,
         -54.88742 ],
        [-80.      , -80.      , -71.10949 , ..., -53.62479 , -55.516094,
         -64.37849 ]], dtype=float32),
 array([[-47.772575, -29.254848, -27.129412, ..., -25.619696, -23.942652,
         -26.628635],
        [-40.447803, -26.701677, -24.844465, ..., -19.004889, -18.90953 ,
         -21.07397 ],
        [-39.022877, -28.286177, -28.856558, ..., -18.701141, -17.66914 ,
         -16.957962],
        ...,
        [-80.      , -78.469604, -73.94559 , ..., -66.37998 , -61.780586,
         -60.027157

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM

# Define the CNN model
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))  # Adjust based on the number of genres or classes
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Example usage
input_shape = (128, 646, 1)  # Adjust based on your feature shape
cnn_model = build_cnn_model(input_shape)


In [17]:
# Prepare the data
X = np.array(features)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)  # Add channel dimension
y = ...  # Your target labels here

# Train the model
cnn_model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)


ValueError: Argument `validation_split` is only supported for tensors or NumPy arrays.Found incompatible type in the input: [<class 'ellipsis'>]

In [10]:


# Generator model
def build_generator(latent_dim):
    model = Sequential()
    model.add(Dense(256 * 16 * 16, activation="relu", input_dim=latent_dim))
    model.add(Reshape((16, 16, 256)))
    model.add(Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same'))
    model.add(Activation("tanh"))
    return model

latent_dim = 100
generator = build_generator(latent_dim)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [11]:
# Discriminator model
def build_discriminator(input_shape):
    model = Sequential()
    model.add(Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=input_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
    model.add(Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    return model

input_shape = (128, 128, 1)  # Adjust based on your feature shape
discriminator = build_discriminator(input_shape)
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




In [13]:
import numpy as np

# Function to generate real samples
def generate_real_samples(features, n_samples):
    idx = np.random.randint(0, features.shape[0], n_samples)
    X = features[idx]
    y = np.ones((n_samples, 1))
    return X, y

# Function to generate latent points
def generate_latent_points(latent_dim, n_samples):
    return np.random.randn(latent_dim * n_samples).reshape(n_samples, latent_dim)

# Function to generate fake samples
def generate_fake_samples(generator, latent_dim, n_samples):
    z = generate_latent_points(latent_dim, n_samples)
    generated_images = generator.predict(z)
    y = np.zeros((n_samples, 1))
    return generated_images, y

# Training the GAN
epochs = 10000
batch_size = 32
half_batch = batch_size // 2

for epoch in range(epochs):
    # Train discriminator
    X_real, y_real = generate_real_samples(X, half_batch)
    X_fake, y_fake = generate_fake_samples(generator, latent_dim, half_batch)
    d_loss_real, d_acc_real = discriminator.train_on_batch(X_real, y_real)
    d_loss_fake, d_acc_fake = discriminator.train_on_batch(X_fake, y_fake)
    
    # Train generator
    z = generate_latent_points(latent_dim, batch_size)
    y_gan = np.ones((batch_size, 1))
    g_loss = gan.train_on_batch(z, y_gan)
    
    # Print progress
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, D Loss Real: {d_loss_real}, D Loss Fake: {d_loss_fake}, G Loss: {g_loss}")


NameError: name 'X' is not defined

NameError: name 'log_mel_spectrogram' is not defined

<Figure size 1000x400 with 0 Axes>