In [None]:
import os
import numpy as np
from fractions import Fraction

class IntegratedRemi2Np:
    def __init__(self, num_max_bars=64, num_input=5, num_output=2):
        self.num_max_bars = num_max_bars
        self.num_steps = num_max_bars * 16  # Quantize one bar into 16 units
        self.num_input = num_input  # note, bar, key, tempo, velocity
        self.num_output = num_output  # chord, emotion

    def process_all_files(self, directory):
        x_data = []
        y_data = []

        for filename in os.listdir(directory):
            if filename.endswith(".txt"):
                filepath = os.path.join(directory, filename)

                remi_data = self.load_remi_data(filepath)
                x, y = self.preprocess(remi_data)
                x_data.append(x)
                y_data.append(y)

        print("Conversion completed")
        x_stacked = np.vstack(x_data)
        y_stacked = np.vstack(y_data)

        return x_stacked, y_stacked

    def load_remi_data(self, filepath):
        # Function to read REMI data and return it as a dictionary
        remi_data = {
            "melody": self.parse_remi_file(filepath, event_types=["Bar", "Position", "Note On"]),
            "velocity": self.parse_remi_file(filepath, event_types=["Bar", "Position", "Note Velocity"]),
            "tempo": self.parse_remi_file(filepath, event_types=["Tempo Value"]),
            "chords": self.parse_remi_file(filepath, event_types=["Bar", "Position", "Chord"]),
            "key": self.parse_remi_file(filepath, event_types=["Bar", "Position", "Key"]),
            "emotion": self.parse_remi_file(filepath, event_types=["Emotion"])  # Parse Emotion events
        }
        return remi_data

    def parse_remi_file(self, filepath, event_types):
        events = []
        with open(filepath, 'r') as file:
            for line in file:
                for event_type in event_types:
                    if f"name={event_type}" in line:
                        events.append(self.parse_event(line))
        return events

    def parse_event(self, line):
        event = {}

        # Extract name
        if "name=" in line:
            event_name = line.split("name=")[1].split(",")[0].strip()
            event['name'] = event_name

        # Extract value
        if "value=" in line:
            event_value = line.split("value=")[1].split(",")[0].strip()
            event['value'] = event_value

        # Extract text
        if "text=" in line:
            event_text = line.split("text=")[1].strip()
            event_text = event_text.rstrip(')')
            event['text'] = event_text

        # Extract time
        if "time=" in line:
            event_time = line.split("time=")[1].split(",")[0].strip()
            event['time'] = int(event_time)

        return event

    def preprocess(self, remi_data):
        # Calculate the actual length of the song
        total_bars = self.get_total_bars(remi_data['melody'])
        actual_num_steps = min(total_bars * 16, self.num_steps)

        # Initialization: Create a numpy array filled with <pad>
        x_sequence = np.full((self.num_steps, self.num_input), '<pad>', dtype=object)
        y_sequence = np.full((self.num_steps, self.num_output), '<pad>', dtype=object)

        # Process melody
        melody_sequence = self.process_melody(remi_data['melody'], actual_num_steps)
        x_sequence[:actual_num_steps, 0] = melody_sequence

        # Process bar
        bar_sequence = self.process_bar(remi_data['melody'], actual_num_steps)
        x_sequence[:actual_num_steps, 1] = bar_sequence

        # Process key
        key_sequence = self.process_key(remi_data['key'], actual_num_steps)
        x_sequence[:actual_num_steps, 2] = key_sequence

        # Process tempo
        tempo_sequence = self.process_tempo(remi_data['tempo'], actual_num_steps)
        x_sequence[:actual_num_steps, 3] = tempo_sequence

        # Process velocity
        velocity_sequence = self.process_velocity(remi_data['velocity'], actual_num_steps)
        x_sequence[:actual_num_steps, 4] = velocity_sequence

        # Process chord
        chord_sequence = self.process_chords(remi_data['chords'], actual_num_steps)
        y_sequence[:actual_num_steps, 0] = chord_sequence

        # Process emotion
        emotion_sequence = self.process_emotion(remi_data['emotion'], actual_num_steps)
        y_sequence[:actual_num_steps, 1] = emotion_sequence

        return x_sequence.reshape(1, self.num_steps, self.num_input), y_sequence.reshape(1, self.num_steps, self.num_output)

    def get_total_bars(self, melody_events):
        total_bars = 0
        for event in melody_events:
            if event['name'] == 'Bar':
                bar_number = int(event['text'])
                if bar_number > total_bars:
                    total_bars = bar_number
        return total_bars

    def process_melody(self, melody_events, actual_num_steps):
        melody_sequence = ["<pad>"] * actual_num_steps
        current_note = "<pad>"
        current_position = None
        current_bar = 1

        for event in melody_events:
            if event['name'] == 'Bar':
                current_bar = int(event['text'])
            elif event['name'] == 'Position':
                pos = float(Fraction(event['value']))
                index = int((pos - 1) * 16 + (current_bar - 1) * 16)
                current_position = index
            elif event['name'] == 'Note On':
                pitch = int(event['value'])
                current_note = str(pitch)

            # Fill from the current position up to before the next event with the current note
            if current_position is not None and current_position < actual_num_steps:
                melody_sequence[current_position] = current_note

        # Forward fill
        for i in range(1, actual_num_steps):
            if melody_sequence[i] == "<pad>":
                melody_sequence[i] = melody_sequence[i - 1]

        return melody_sequence

    def process_bar(self, melody_events, actual_num_steps):
        bar_sequence = ["<pad>"] * actual_num_steps
        current_bar = 1
        for event in melody_events:
            if event['name'] == 'Bar':
                current_bar = int(event['text'])
                # Calculate the start index of the current bar
                bar_start_index = (current_bar - 1) * 16
                if bar_start_index < actual_num_steps:
                    # Fill from the current bar start to before the next bar with the current bar number
                    end_index = min(bar_start_index + 16, actual_num_steps)
                    bar_sequence[bar_start_index:end_index] = [str(current_bar)] * (end_index - bar_start_index)
        return bar_sequence

    def process_key(self, key_events, actual_num_steps):
        key_sequence = ["<pad>"] * actual_num_steps
        current_position = None
        current_bar = 1
        previous_key = "<pad>"

        for event in key_events:
            if event['name'] == 'Bar':
                current_bar = int(event['text'])
            elif event['name'] == 'Position':
                pos = float(Fraction(event['value']))
                index = int((pos - 1) * 16 + (current_bar - 1) * 16)
                current_position = index
            elif event['name'] == 'Key':
                key_value = event['value']
                if current_position is not None and current_position < actual_num_steps:
                    key_sequence[current_position] = key_value
                    previous_key = key_value

        # Forward fill
        for i in range(actual_num_steps):
            if key_sequence[i] == "<pad>":
                key_sequence[i] = previous_key
            else:
                previous_key = key_sequence[i]

        return key_sequence

    def process_tempo(self, tempo_events, actual_num_steps):
        tempo_sequence = ["<pad>"] * actual_num_steps
        tempo_value = "<pad>"

        if tempo_events:
            for event in tempo_events:
                if event['name'] == 'Tempo Value':
                    tempo_value = event['value']
                    break  # Use only the first tempo value

        tempo_sequence = [tempo_value] * actual_num_steps
        return tempo_sequence

    def process_velocity(self, velocity_events, actual_num_steps):
        velocity_sequence = ["<pad>"] * actual_num_steps
        current_velocity = "<pad>"
        current_position = None
        current_bar = 1

        for event in velocity_events:
            if event['name'] == 'Bar':
                current_bar = int(event['text'])
            elif event['name'] == 'Position':
                pos = float(Fraction(event['value']))
                index = int((pos - 1) * 16 + (current_bar - 1) * 16)
                current_position = index
            elif event['name'] == 'Note Velocity':
                velocity_value = event['value']
                current_velocity = velocity_value

            if current_position is not None and current_position < actual_num_steps:
                velocity_sequence[current_position] = current_velocity

        # Forward fill
        for i in range(1, actual_num_steps):
            if velocity_sequence[i] == "<pad>":
                velocity_sequence[i] = velocity_sequence[i - 1]

        return velocity_sequence

    def process_chords(self, chord_events, actual_num_steps):
        chord_sequence = ["<pad>"] * actual_num_steps
        current_chord = "<pad>"
        current_position = None
        current_bar = 1

        for event in chord_events:
            if event['name'] == 'Bar':
                current_bar = int(event['text'])
            elif event['name'] == 'Position':
                pos = float(Fraction(event['value']))
                index = int((pos - 1) * 16 + (current_bar - 1) * 16)
                current_position = index
            elif event['name'] == 'Chord':
                chord_value = event['value']
                current_chord = chord_value

            if current_position is not None and current_position < actual_num_steps:
                chord_sequence[current_position] = current_chord

        # Forward fill
        for i in range(1, actual_num_steps):
            if chord_sequence[i] == "<pad>":
                chord_sequence[i] = chord_sequence[i - 1]

        return chord_sequence

    def process_emotion(self, emotion_events, actual_num_steps):
        emotion_sequence = ["<pad>"] * actual_num_steps
        emotion_label = "<pad>"

        if emotion_events:
            for event in emotion_events:
                if event['name'] == 'Emotion':
                    emotion_label = event['value']
                    break  # Use only the first emotion label

        # Fill the emotion label for the actual length of the song
        emotion_sequence[:actual_num_steps] = [emotion_label] * actual_num_steps
        return emotion_sequence


In [None]:
# useage example
if __name__ == '__main__':
    num_max_bars = 64
    num_input = 5
    num_output = 2 # now we use only 1 output of chord. 

    # REMI file
    remi_dir_pop = r''

    # np file
    output_dir = r''
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Directory '{output_dir}' created.")

    # POP909 data processing
    preprocessor_pop = IntegratedRemi2Np(num_max_bars=num_max_bars, num_input=num_input, num_output=num_output)
    x_pop, y_pop = preprocessor_pop.process_all_files(remi_dir_pop)

    # save as npy file
    np.save(os.path.join(output_dir, 'x_pop.npy'), x_pop)
    np.save(os.path.join(output_dir, 'y_pop.npy'), y_pop)
    print("Final np arrays saved successfully.")
