In [10]:
import math
import json
import librosa
import os
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf

def preprocess(dataset_path, num_mfcc=40, n_fft=2048, hop_length=512, num_segment=10):
    data = {"labels": [], "mfcc": []}
    sample_rate = 22050
    samples_per_segment = int(sample_rate * 30 / num_segment)
    
    genre_labels = {}
    label_idx = 0
    
    for dirpath, _, filenames in os.walk(dataset_path):
        for f in sorted(filenames):
            if not f.endswith('.wav'):
                continue
            file_path = os.path.join(dirpath, f)
            print("Track Name ", file_path)
            
            try:
                y, sr = librosa.load(file_path, sr=sample_rate)
            except Exception as e:
                print("Error loading file:", e)
                continue
            
            genre_label = dirpath.split('/')[-1]
            if genre_label not in genre_labels:
                genre_labels[genre_label] = label_idx
                label_idx += 1
            
            for n in range(num_segment):
                mfcc = librosa.feature.mfcc(y=y[samples_per_segment * n: samples_per_segment * (n + 1)],
                                            sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                mfcc = mfcc.T
                if len(mfcc) == math.ceil(samples_per_segment / hop_length):
                    data["mfcc"].append(mfcc.tolist())
                    data["labels"].append(genre_labels[genre_label])
    
    return data


# Call preprocess function to get data
dataset_path = "C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original"
mfcc_data = preprocess(dataset_path)

# Convert data to numpy arrays
x = np.array(mfcc_data["mfcc"])
y = np.array(mfcc_data["labels"])

# Split the data into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

# Define input shape
input_shape = (x_train.shape[1], x_train.shape[2])

# Build the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
model.add(tf.keras.layers.LSTM(64))
model.add(tf.keras.layers.Dense(64, activation="relu"))
model.add(tf.keras.layers.Dense(10, activation="softmax"))

# Compile the model
optimiser = tf.keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimiser,
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])
model.summary()

# Train the model
model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=32, epochs=60, verbose=2)

# Save the model
model.save("GTZAN_LSTM.h5")

# Evaluate the model on test data
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = np.sum(y_pred == y_test) / len(y_pred)
print("Test Accuracy:", accuracy)


Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00000.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00001.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00002.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00003.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00004.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00005.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00006.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00007.wav
Track Name  C:/Users/AsyrafAmeran/Desktop/AMG/AI-Music-Generation/Data/genres_original\blues\blues.00008.wav
Track Name  C:/User



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 130, 64)           26880     
                                                                 
 lstm_1 (LSTM)               (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 10)                650       
                                                                 
Total params: 64,714
Trainable params: 64,714
Non-trainable params: 0
_________________________________________________________________
Epoch 1/60
188/188 - 17s - loss: 1.6937 - accuracy: 0.3978 - val_loss: 1.4126 - val_accuracy: 0.4846 - 17s/epoch - 89ms/step
Epoch 2/60
188/188 - 13s - loss: 1.3187 - accuracy: 0.5219 

In [46]:
import numpy as np
import librosa
import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model("GTZAN_LSTM.h5")

def generate_music(genre_label, sample_rate=22050, num_mfcc=40, n_fft=2048, hop_length=512, duration=30, num_segment=10):
    try:
        # Generate random noise as seed input
        seed_input = np.random.randn(sample_rate * duration)
        mfccs = []
        for n in range(num_segment):
            segment = seed_input[n * sample_rate * duration // num_segment: (n + 1) * sample_rate * duration // num_segment]
            mfcc = librosa.feature.mfcc(y=segment, sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
            mfcc = mfcc.T
            mfccs.append(mfcc)
        mfccs = np.array(mfccs)
        
        # Predict the genre label using the model
        genre_label = np.array([genre_label])
        prediction = model.predict(mfccs)
        generated_label = np.argmax(np.mean(prediction, axis=0))  # Taking mean prediction over all segments
        
        # If the model's prediction does not match the input genre label, return error
        if not np.array_equal(generated_label, genre_label):
            return None
        
        # Generate music using the model
        generated_music = []
        for mfcc in mfccs:
            mfcc = np.expand_dims(mfcc, axis=0)  # Adding batch dimension
            prediction = model.predict(mfcc)
            generated_music.extend(prediction[0])  # Assuming output is a probability distribution
            
        # Scale the generated music to the same range as the training data
        # This step may vary based on the specific output of your model
        generated_music = np.array(generated_music)
        generated_music = (generated_music - np.min(generated_music)) / (np.max(generated_music) - np.min(generated_music))
        
        return generated_music
    
    except Exception as e:
        print("Error generating music:", e)
        return None

# User input for genre selection
selected_genre = input("Enter the genre for music generation: ")

# Generate music based on the user-selected genre
generated_music = generate_music(selected_genre)

# Save the generated music as an MP3 file
if generated_music is not None:
    librosa.output.write_wav(f"generated_music_{selected_genre}.wav", generated_music, sr=22050)
    print("Generated music saved successfully.")
else:
    print("Failed to generate music.")


Failed to generate music.


  if generated_label != genre_label:
