<a href="https://colab.research.google.com/github/CHIN-HUA/nft-marketplace/blob/polygon_and_music/Music%E9%A0%90%E8%99%95%E7%90%86.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import json
import music21 as m21

KERN_DATASET_PATH = "deutschl/test"
SAVE_DIR = "dataset"
SINGLE_FILE_DATASET = "file_dataset"
MAPPING_PATH = "mapping.json"
SEQUENCE_LENGTH = 64

# 持續時間以四分之一長度
ACCEPTABLE_DURATIONS = [
    0.25, # 16th note
    0.5, # 8th note
    0.75,
    1.0, # quarter note
    1.5,
    2, # half note
    3,
    4 # whole note
]


def load_songs_in_kern(dataset_path):
    """Loads all kern pieces in dataset using music21.

    :param dataset_path (str): Path to dataset
    :return songs (list of m21 streams): List containing all pieces
    """
    songs = []

    # 瀏覽數據集中的所有文件並用 music21 加載它們
    for path, subdirs, files in os.walk(dataset_path):
        for file in files:

            # consider only kern files
            if file[-3:] == "krn":
                song = m21.converter.parse(os.path.join(path, file))
                songs.append(song)
    return songs


def has_acceptable_durations(song, acceptable_durations):
    """Boolean routine that returns True if piece has all acceptable duration, False otherwise.

    :param song (m21 stream):
    :param acceptable_durations (list): List of acceptable duration in quarter length
    :return (bool):
    """
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    return True


def transpose(song):
    """Transposes song to C maj/A min

    :param piece (m21 stream): Piece to transpose
    :return transposed_song (m21 stream):
    """

    # get key from the song
    parts = song.getElementsByClass(m21.stream.Part)
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4]

    # estimate key using music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")

    # get interval for transposition. E.g., Bmaj -> Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))

    # transpose song by calculated interval
    tranposed_song = song.transpose(interval)
    return tranposed_song


def encode_song(song, time_step=0.25):
    """將樂譜轉換為類似時間序列的音樂表示。 編碼列表中的每個項目代表“min_duration”
     四分之一長度。 每一步使用的符號是：整數表示 MIDI 音符，“r”表示休止符，“_”
     用於表示轉移到新時間步的音符/休止符。 這是一個示例編碼：

        ["r", "_", "60", "_", "_", "_", "72" "_"]

    :param song (m21 stream): Piece to encode
    :param time_step (float): Duration of each time step in quarter length
    :return:
    """

    encoded_song = []

    for event in song.flat.notesAndRests:

        # handle notes
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi # 60
        # handle rests
        elif isinstance(event, m21.note.Rest):
            symbol = "r"

        # convert the note/rest into time series notation
        steps = int(event.duration.quarterLength / time_step)
        for step in range(steps):

            # 如果這是我們第一次看到一個音符/休止符，讓我們對其進行編碼。 否則，這意味著我們攜帶相同的
            # symbol in a new time step
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")

    # cast encoded song to str
    encoded_song = " ".join(map(str, encoded_song))

    return encoded_song


def preprocess(dataset_path):

    # load folk songs
    print("Loading songs...")
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs.")

    for i, song in enumerate(songs):

        # 過濾掉持續時間不可接受的歌曲
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue

        # transpose songs to Cmaj/Amin
        song = transpose(song)

        # 用音樂時間序列表示對歌曲進行編碼
        encoded_song = encode_song(song)

        # save songs to text file
        save_path = os.path.join(SAVE_DIR, str(i))
        with open(save_path, "w") as fp:
            fp.write(encoded_song)


def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    return song


def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length):
    """生成一個文件，整理所有編碼的歌曲並添加新的片段分隔符。

     :param dataset_path (str): 包含編碼歌曲的文件夾路徑
     :param file_dataset_path (str): 將歌曲保存在單個文件中的文件路徑
     :param sequence_length (int): # 訓練要考慮的時間步長
     :return song (str): 包含數據集中所有歌曲的字符串 + 分隔符
    """

    new_song_delimiter = "/ " * sequence_length
    songs = ""

    # 加載編碼歌曲並添加分隔符
    for path, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file)
            song = load(file_path)
            songs = songs + song + " " + new_song_delimiter

    # 從字符串的最後一個字符中刪除空格
    songs = songs[:-1]

    # 保存包含所有數據集的字符串
    with open(file_dataset_path, "w") as fp:
        fp.write(songs)

    return songs


def create_mapping(songs, mapping_path):
    """創建一個 json 文件，將歌曲數據集中的符號映射到整數

     :param song (str): 所有歌曲的字符串
     :param mapping_path (str): 保存映射的路徑
     ：retun:
    """
    mappings = {}

    # 識別詞彙
    songs = songs.split()
    vocabulary = list(set(songs))

    # 創建映射
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # 將詞彙保存到 json 文件
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)


def main():
    preprocess(KERN_DATASET_PATH)
    songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET, SEQUENCE_LENGTH)
    create_mapping(songs, MAPPING_PATH)


if __name__ == "__main__":
    main()


Loading songs...
Loaded 0 songs.
