In [9]:
#***********************************************************************************************
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from music21 import *
from tensorflow.keras.utils import to_categorical
import random

**Example of music21**

In [10]:
from music21 import stream, harmony, duration
# Create a stream to hold the chords
chord_stream = stream.Stream()

# Define the chord progression and durations
chord_names = ['C', 'F', 'G', 'C']
chord_durations = [1, 1, 1, 1]  # Whole notes

# Create and add chords to the stream
for chord_name, dur in zip(chord_names, chord_durations):
    chord = harmony.ChordSymbol(chord_name)
    chord.duration = duration.Duration(dur)
    chord_stream.append(chord)

# Show the stream
chord_stream.show('text')  # This will display the stream in a text-based format

# To create a MIDI file:
chord_stream.write('midi', fp='chord_progression.mid')

{0.0} <music21.harmony.ChordSymbol C>
{1.0} <music21.harmony.ChordSymbol F>
{2.0} <music21.harmony.ChordSymbol G>
{3.0} <music21.harmony.ChordSymbol C>


'chord_progression.mid'

**Agent Definition**

In [11]:
class MusicComposerAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size  # Length of the chord sequence representing the state
        self.action_size = action_size  # Number of possible actions (chords)
        self.memory = []  # Memory for storing experiences
        self.gamma = 0.95  # Discount rate for future rewards
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.lstm_model = self._build_lstm_model()

    def _build_lstm_model(self):
        """Builds an LSTM network to be used by the agent."""
        model = Sequential()
        model.add(LSTM(50, input_shape=(self.state_size, self.action_size), return_sequences=True))
        model.add(LSTM(50))
        model.add(Dense(self.action_size, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam')
        return model

    def choose_action(self, state):
        """Chooses the next action (chord) based on the current state."""
        if np.random.rand() <= self.epsilon:
            return np.random.randint(self.action_size)  # Explore: choose a random action
        action_probs = self.lstm_model.predict(state)  # Exploit: choose best action based on model
        return np.argmax(action_probs[0])

    def remember(self, state, action, reward, next_state, done):
        """Stores an experience in memory."""
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        """Trains the agent using a batch of past experiences."""
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            # Reshape the states to match the input shape expected by the LSTM model
            state_reshaped = np.reshape(state, (1, self.state_size, self.action_size))
            next_state_reshaped = np.reshape(next_state, (1, self.state_size, self.action_size))

            target = reward
            if not done:
                # Predict for the reshaped next state
                target = (reward + self.gamma * np.amax(self.lstm_model.predict(next_state_reshaped)[0]))
            target_f = self.lstm_model.predict(state_reshaped)
            target_f[0][action] = target
            self.lstm_model.fit(state_reshaped, target_f, epochs=1, verbose=0)

    def calculate_reward(self, state, action):
        musical_key = ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim'] #  'Cmaj7', 'Dm7', 'G7'
        chord_history_length = 4  # Number of previous chords to consider for context
        reward = 0

        # Extract the current and previous chords
        current_chord_index = action % len(musical_key) # Determine the index of the current chord based on the agent's action.
        current_chord = musical_key[current_chord_index] # Retrieve the current chord from the musical key using the index.
        # Create a list of previously selected chords based on the current state.
        previous_chords = [
            musical_key[np.argmax(state[i])]  # finds the index of the highest value in each segment, which corresponds to the selected chord in that part of the state.
            for i in range(len(state) - chord_history_length, len(state))
        ]

        # Reward for common chord progressions
        common_progressions = [
            (['F', 'G'], 'C'),      # IV-V-I progression
            (['Dm', 'G'], 'C'),     # ii-V-I progression
            (['Am', 'Dm', 'G'], 'C'),  # vi-ii-V-I progression
            (['C', 'Am', 'Dm', 'G'], 'C'),  # I-vi-ii-V progression
            (['Am', 'F', 'C', 'G'], 'Am'),  # vi-IV-I-V progression
            (['C', 'G', 'Am', 'F'], 'C'),   # I-V-vi-IV progression
            # Add other common progressions here
        ]
        for progression, next_chord in common_progressions:
            if previous_chords[-len(progression):] == progression and current_chord == next_chord:
                reward += 1.0  # Increase the reward for common progressions

        # Penalty for excessive repetition
        repetition_penalty = 1.0  # Increased penalty for repetition
        if previous_chords.count(current_chord) >= 3:  # Penalize if the same chord is repeated more than 3 times in a row
            reward -= repetition_penalty

        # Reward for chord variety
        unique_chords = len(set(previous_chords))
        reward += unique_chords * 0.1  # Reward based on the number of unique chords in the recent history

        # Immediate transition reward
        transition_matrix = [
            [0.15, 0.1, 0.15, 0.2, 0.15, 0.1, 0],  #C
            [0.15, 0.05, 0.1, 0.15, 0.18, 0.15, 0],  #Dm
            [0.15, 0.1, 0.1, 0.18, 0.15, 0.15, 0.1],  #Em
            [0.18, 0.1, 0.15, 0.15, 0.15, 0.15, 0.1],  #F
            [0.18, 0.1, 0.1, 0.15, 0.15, 0.1, 0.1],  #G
            [0.15, 0.15, 0.1, 0.15, 0.1, 0.1, 0],  #Am
            [0.15, 0, 0.1, 0.1, 0.1, 0.05, 0]   #Bdim
        ]
        last_chord_index = np.argmax(state[0][-len(musical_key):])
        immediate_transition_reward = transition_matrix[last_chord_index][current_chord_index]
        reward += immediate_transition_reward

        # Ensure the reward is non-negative
        #reward = max(reward, 0)

        return reward


    def train(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = np.random.choice(self.memory, batch_size, replace=False)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.lstm_model.predict(next_state)[0]))
            target_f = self.lstm_model.predict(state)
            target_f[0][action] = target
            self.lstm_model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

**Implementation**

In [12]:
from music21 import harmony, duration

def action_to_chord_duration(action_index):
    # Define chords and durations
    chords = ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim'] # 'Cmaj7', 'Dm7', 'G7'
    durations = [1]  # can use [0.25, 0.5, 1, 2] Representing quarter, half, whole, and double whole notes

    num_chords = len(chords)
    num_durations = len(durations)

    # Calculate the chord index and duration index
    chord_index = action_index % num_chords
    duration_index = action_index // num_chords

    chord_name = chords[chord_index]
    dur = durations[duration_index]

    # Create a chord symbol and set its duration
    chord = harmony.ChordSymbol(chord_name)
    chord.duration = duration.Duration(dur)

    return chord, dur

# Example usage
action_index = 5  # Suppose the agent chose action index 5
chord, dur = action_to_chord_duration(action_index)
print(f"Chord: {chord}, Duration: {dur}")


Chord: <music21.harmony.ChordSymbol Am>, Duration: 1


**Dataset**

In [13]:
# Chord mapping to indices
chords = ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim'] #'Cmaj7', 'Dm7', 'G7'
chord_to_index = {chord: i for i, chord in enumerate(chords)}

# Generate sequences
num_sequences = 100
sequence_length = 32
dataset = []

for _ in range(num_sequences):
    sequence = []
    for _ in range(sequence_length):
        if not sequence or sequence[-1] == chord_to_index['C']:
            # Start with C or follow C with F or G or Am
            next_chord = np.random.choice([chord_to_index['F'], chord_to_index['G'], chord_to_index['Am']])
        elif sequence[-1] == chord_to_index['F']:
            # Follow F with G or C
            next_chord = np.random.choice([chord_to_index['G'], chord_to_index['C']])
        elif sequence[-1] == chord_to_index['G']:
            # Follow G with C or Dm or Em
            next_chord = np.random.choice([chord_to_index['C'], chord_to_index['Dm'], chord_to_index['Em']])
        else:
            # Randomly choose any chord as a fallback
            next_chord = np.random.randint(len(chords))

        sequence.append(next_chord)
    dataset.append(sequence)

# Convert to a format suitable for training
dataset = np.array(dataset)

# Print the first sequence as an example
print("Example sequence (indices):", dataset[0])
print("Example sequence (chords):", [chords[i] for i in dataset[0]])

'''The dataset serves as a memory of past experiences,
 allowing the agent to learn from its interactions with the environment. During training,
 the agent explores the musical space, chooses chords, and receives rewards based on the defined reward function. The experiences
 (state, action, reward) are stored in the dataset, and the agent uses this dataset to learn the patterns that lead to higher rewards.

 In the provided dataset, the first row is:
0,1,2,3,4

This represents a sequence of chord indices. The mapping of these indices to actual chord names would depend on the context of our application. For example,
 if we have a predefined set of chords like ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim', 'Cmaj7', 'Dm7', 'G7']
, then the sequence [0,1,2,3,4] would correspond to: C,Dm,Em,F,G
 '''

Example sequence (indices): [5 2 2 5 6 4 0 4 0 4 1 3 0 4 1 3 0 5 5 4 0 3 0 3 4 1 4 1 1 2 6 2]
Example sequence (chords): ['Am', 'Em', 'Em', 'Am', 'Bdim', 'G', 'C', 'G', 'C', 'G', 'Dm', 'F', 'C', 'G', 'Dm', 'F', 'C', 'Am', 'Am', 'G', 'C', 'F', 'C', 'F', 'G', 'Dm', 'G', 'Dm', 'Dm', 'Em', 'Bdim', 'Em']


"The dataset serves as a memory of past experiences,\n allowing the agent to learn from its interactions with the environment. During training,\n the agent explores the musical space, chooses chords, and receives rewards based on the defined reward function. The experiences\n (state, action, reward) are stored in the dataset, and the agent uses this dataset to learn the patterns that lead to higher rewards.\n\n In the provided dataset, the first row is:\n0,1,2,3,4\n\nThis represents a sequence of chord indices. The mapping of these indices to actual chord names would depend on the context of our application. For example,\n if we have a predefined set of chords like ['C', 'Dm', 'Em', 'F', 'G', 'Am', 'Bdim', 'Cmaj7', 'Dm7', 'G7']\n, then the sequence [0,1,2,3,4] would correspond to: C,Dm,Em,F,G\n "

In [15]:
# Parameters for the agent
state_size = 4 # Number of previous chords to use as the state
action_size = 7

# Convert sequences to a format suitable for training
def one_hot_encode(sequence, num_classes):
    return to_categorical(sequence, num_classes=num_classes)

encoded_dataset = [one_hot_encode(sequence, action_size) for sequence in dataset]



# Initialize the agent
agent = MusicComposerAgent(state_size, action_size)

# Training the agent
num_epochs = 5
batch_size = 32
for epoch in range(num_epochs):
    for sequence in encoded_dataset:
        for i in range(0, len(sequence) - state_size):
            state = sequence[i:i + state_size]
            next_state = sequence[i + 1:i + state_size + 1]
            action = np.argmax(sequence[i + state_size])
            reward = agent.calculate_reward(state, action)
            done = (i + state_size + 1 == len(sequence))
            agent.remember(state, action, reward, next_state, done)
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
    if agent.epsilon > agent.epsilon_min:
        agent.epsilon *= agent.epsilon_decay
    print(f"Epoch {epoch+1}/{num_epochs} completed")


Epoch 1/5 completed
Epoch 2/5 completed
Epoch 3/5 completed
Epoch 4/5 completed
Epoch 5/5 completed


In [17]:
# Function to generate a sequence of chords using the trained agent
def generate_chords(agent, initial_state, desired_length, state_size, action_size):
    generated_sequence = initial_state.copy()
    for _ in range(desired_length):
        if len(generated_sequence) < state_size:
            current_state = np.zeros((state_size, action_size))  # Padding with zeros
        else:
            current_state = one_hot_encode(generated_sequence[-state_size:], action_size)

        current_state_input = np.reshape(current_state, (1, state_size, action_size))
        next_action = agent.choose_action(current_state_input)
        generated_sequence.append(next_action)
    
    return generated_sequence

# Generate a new song
desired_length = 32  # Length of the song (number of chords)
initial_state = []   # Starting with an empty sequence
generated_chords = generate_chords(agent, initial_state, desired_length, state_size, action_size)

# Convert the generated sequence to music21 stream
generated_composition = stream.Stream()
for action_index in generated_chords:
    chord_symbol, chord_duration = action_to_chord_duration(action_index)
    generated_composition.append(chord_symbol)

# Show or play the generated composition
generated_composition.show('midi')
generated_composition.write('midi', fp='model_output_2.mid')





'model_output_2.mid'

In [18]:
# Read song mid file and show the notes using music21
song = converter.parse('model_output_2.mid')
song.show('text')

{0.0} <music21.metadata.Metadata object at 0x1d1a09d91d0>
{0.0} <music21.stream.Part 0x1d46b069450>
    {0.0} <music21.stream.Measure 1 offset=0.0>
        {0.0} <music21.instrument.Instrument ''>
        {0.0} <music21.clef.BassClef>
        {0.0} <music21.tempo.MetronomeMark animato Quarter=120>
        {0.0} <music21.meter.TimeSignature 4/4>
        {0.0} <music21.chord.Chord G3 B3 D4>
        {1.0} <music21.chord.Chord F3 A3 C4>
        {2.0} <music21.chord.Chord F3 A3 C4>
        {3.0} <music21.chord.Chord B2 D3 F3>
    {4.0} <music21.stream.Measure 2 offset=4.0>
        {0.0} <music21.chord.Chord C3 E3 G3>
        {1.0} <music21.chord.Chord G3 B3 D4>
        {2.0} <music21.chord.Chord G3 B3 D4>
        {3.0} <music21.chord.Chord E3 G3 B3>
    {8.0} <music21.stream.Measure 3 offset=8.0>
        {0.0} <music21.chord.Chord E3 G3 B3>
        {1.0} <music21.chord.Chord G3 B3 D4>
        {2.0} <music21.chord.Chord E3 G3 B3>
        {3.0} <music21.chord.Chord C3 E3 G3>
    {12.0} <music