In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

C:\Users\shrut\anaconda3\lib\site-packages\numpy\.libs\libopenblas.GK7GX5KEQ4F6UYO3P26ULGBQYHGQO7J4.gfortran-win_amd64.dll
C:\Users\shrut\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
  from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top


In [2]:
# Set paths
DATA_PATH = 'Audio_Files/'  # Update this with your path
major_folder = os.path.join(DATA_PATH, 'major')
minor_folder = os.path.join(DATA_PATH, 'minor')

# Hyperparameters
n_mfcc = 20  
sequence_length = 30  
batch_size = 32
epochs = 50

In [3]:
# Function to load and preprocess audio files
def load_and_extract_features(folder_path):
    features = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            y, sr = librosa.load(file_path)
            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            features.append(mfccs.T)
    return features

In [4]:
# Load and prepare dataset
major_features = load_and_extract_features(major_folder)
minor_features = load_and_extract_features(minor_folder)

# Prepare sequences
def create_sequences(features, seq_length):
    X, y = [], []
    for mfcc in features:
        for i in range(len(mfcc) - seq_length):
            X.append(mfcc[i:i + seq_length])
            y.append(mfcc[i + seq_length])
    return np.array(X), np.array(y)

In [5]:
X_major, y_major = create_sequences(major_features, sequence_length)
X_minor, y_minor = create_sequences(minor_features, sequence_length)

# Combine and label data (1 for major, 0 for minor)
X = np.concatenate((X_major, X_minor), axis=0)
y = np.concatenate((y_major, y_minor), axis=0)

In [6]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Define the model
model = Sequential([
    LSTM(64, input_shape=(sequence_length, n_mfcc), return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(n_mfcc)  # Output layer with the same dimension as MFCC input
])

  super().__init__(**kwargs)


In [None]:
model.compile(optimizer='adam', loss='mse')
model.summary()

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 41ms/step - loss: 4055.2861 - val_loss: 606.1160
Epoch 2/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 40ms/step - loss: 490.6552 - val_loss: 143.1201
Epoch 3/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 35ms/step - loss: 192.3761 - val_loss: 109.3244
Epoch 4/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 35ms/step - loss: 165.5600 - val_loss: 100.6715
Epoch 5/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 36ms/step - loss: 155.7856 - val_loss: 97.2126
Epoch 6/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 34ms/step - loss: 150.0359 - val_loss: 96.6808
Epoch 7/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 34ms/step - loss: 146.2354 - val_loss: 92.5811
Epoch 8/50
[1m1148/1148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 35ms/step - loss: 142.7170

In [None]:
# Evaluate
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {test_loss}")

# Save model 
model.save("melody_generator_model.h5")

In [None]:
# Training loss (MSE) on the training data
train_loss = model.evaluate(X_train, y_train)
print(f"Training Loss (MSE): {train_loss}")

# Calculate test loss (MSE) on the test data
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {test_loss}")

In [None]:
from sklearn.metrics import r2_score

# Predict on train and test sets
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculate R-squared score for training and test sets
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Training R^2 Score: {train_r2}")
print(f"Test R^2 Score: {test_r2}")

In [None]:
import librosa.display
import matplotlib.pyplot as plt

# Function to generate a sequence
def generate_sequence(model, start_sequence, sequence_length):
    generated_sequence = start_sequence.copy()
    
    for _ in range(sequence_length):
        prediction = model.predict(generated_sequence[-30:].reshape(1, -1, 20))  # Reshape to (1, 30, 20)
        generated_sequence = np.vstack([generated_sequence, prediction])
        
    return generated_sequence

In [None]:
def mfcc_to_audio(mfcc_sequence, sr=22050):
    S = librosa.feature.inverse.mfcc_to_mel(mfcc_sequence.T, n_mels=128)
    audio = librosa.feature.inverse.mel_to_audio(S, sr=sr)
    return audio

In [None]:
import IPython.display as ipd

# Load a seed audio file and extract initial MFCCs
y, sr = librosa.load(os.path.join(major_folder, "Major_8.wav"))  # replace with actual file path
initial_mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T[:sequence_length]  # First sequence

# Generating  melody
generated_mfccs = generate_sequence(model, initial_mfccs, sequence_length=100)  # Generate 100 frames

# Convert generated MFCCs to audio
generated_audio = mfcc_to_audio(generated_mfccs, sr=sr)

# Play generated audio
ipd.Audio(generated_audio, rate=sr)


In [None]:
import IPython.display as ipd

# Load a seed audio file and extract initial MFCCs
y, sr = librosa.load(os.path.join(minor_folder, "Minor_8.wav"))  # replace with actual file path
initial_mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T[:sequence_length]  # First sequence

# Generate a melody
generated_mfccs = generate_sequence(model, initial_mfccs, sequence_length=100)  # Generate 100 frames

# Convert generated MFCCs to audio
generated_audio = mfcc_to_audio(generated_mfccs, sr=sr)

# Play generated audio
ipd.Audio(generated_audio, rate=sr)


In [None]:
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

# Function to generate a sequence with added Gaussian noise
def generate_sequence_with_noise(model, start_sequence, sequence_length, noise_stddev=0.01):
    generated_sequence = start_sequence.copy()
    
    for _ in range(sequence_length):
        # Predict the next MFCC frame
        prediction = model.predict(generated_sequence[-30:].reshape(1, -1, n_mfcc)).flatten()
        
        # Add Gaussian noise to the prediction for variability
        prediction += np.random.normal(0, noise_stddev, prediction.shape)
        
        # Append the predicted frame to the generated sequence
        generated_sequence = np.vstack([generated_sequence, prediction])
        
    return generated_sequence

In [None]:
# Function to convert MFCC back to audio
def mfcc_to_audio(mfcc_sequence, sr=22050):
    S = librosa.feature.inverse.mfcc_to_mel(mfcc_sequence.T, n_mels=128)
    audio = librosa.feature.inverse.mel_to_audio(S, sr=sr)
    return audio

# Load a seed audio file and extract MFCCs
y, sr = librosa.load(os.path.join(major_folder, "Major_9.wav"))  # replace with actual file path
initial_mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T[:sequence_length]


In [None]:
# Generate the melody with Gaussian noise added
generated_mfccs = generate_sequence_with_noise(model, initial_mfccs, sequence_length=100, noise_stddev=0.01)

# Convert generated MFCCs to audio
generated_audio = mfcc_to_audio(generated_mfccs, sr=sr)

In [None]:
# Plot the spectrogram of the generated audio
plt.figure(figsize=(10, 6))
librosa.display.waveshow(generated_audio, sr=sr)
plt.title("Generated Audio Waveform")
plt.show()

plt.figure(figsize = (10,6))
librosa.display.waveshow(y = y, sr = sr)
plt.title("Original Audio Waveform")
plt.show()

In [None]:
# Play the generated audio
import IPython.display as ipd
ipd.Audio(generated_audio, rate=sr)