# Libraries

1. [py_midicsv](https://github.com/timwedde/py_midicsv): Used for reading MIDI files as CSV and vice versa.

2. [mido](https://github.com/mido/mido): Utilized for calculating tempo, tick, signature, and seconds in MIDI files.

3. [configparser](https://github.com/jaraco/configparser): Essential for reading and saving INI files.

4. [pickle](https://docs.python.org/pt-br/3/library/pickle.html): Employed for reading and saving PyTorch datasets.



# Recreating MIDI files

In [2]:
from py_midicsv import midi_to_csv, csv_to_midi, FileWriter
from os import path, scandir
from configparser import ConfigParser
from mido import tick2second, second2tick
from numpy import array, split
from pickle import dump, load
from torch.utils.data import Dataset, DataLoader
from typing import List


class MidiHeroFile:
    """
    This class reads MIDI files and recreates them with a normalized tempo.

    Attributes:
        path_folder (str): The path of the folder that contains the song's information, such as notes.mid and song.ini.
        save_backup (bool): Whether you want to save the .mid backup file or not. Default value is True.

    Methods:
        save(data, filename):
            Saves a .mid file in the current path_folder directory.

        __recalculate_seconds__(index, array):
            Iterates through the MIDI array starting at the given index and calculates the seconds using the real tempo.

        recreate_midi_file():
            Uses the calculated seconds to change the tempo of the .mid file. Also removes all unused data and events.
    """
    def __init__(self, path_folder, save_backup=True):
        self.path_folder = path_folder
        self.save_backup = save_backup
        config = ConfigParser()
        config.read(path.join(path_folder, "song.ini"))
        self.preview_start_time = int(config.get('song', 'preview_start_time'))
        
        self.midi_data = midi_to_csv(path.join(path_folder, "notes.mid"))

        header = self.midi_data[0].replace('\n', '').split(', ')
        self.type = int(header[3])
        self.num_channels = int(header[4])
        self.signature = int(header[5])
    

    def save(self, data, filename):
        midi_object = csv_to_midi(data)
        with open(path.join(self.path_folder, filename), "wb") as output_file:
            midi_writer = FileWriter(output_file)
            midi_writer.write(midi_object)
    

    def __recalculate_seconds__(self, index, array):
        tempo = int(array[index][3].strip())
        position = int(array[index][1].strip())
        seconds = array[index][-1]
        for i in range(index + 1, len(array)):
            current_position = int(array[i][1].strip())
            array[i][-1] = tick2second(current_position - position, 480, tempo) + seconds
        return array


    def recreate_midi_file(self):
        if self.save_backup:
            self.save(self.midi_data, "notes_backup.mid")

        filtered = filter(lambda x: "Tempo" in x or "Note_on_c" in x, self.midi_data)

        data = list(map(lambda x: x.replace('\n', '').split(', ') + [0], filtered))
        sorted_data = sorted(data, key=lambda x: (int(x[1]), int(x[0])))
        for i in range(len(sorted_data)):
            if sorted_data[i][2] == "Tempo":
                sorted_data = self.__recalculate_seconds__(i, sorted_data)

        first_tempo = 624595

        to_save = [
            f'0, 0, Header, {self.type}, {self.num_channels}, {self.signature}\n',
            '1, 0, Start_track\n',
            '1, 0, Title_t, "midi_export"\n',
            '1, 0, Time_signature, 4, 2, 24, 8\n',
            f'1, 0, Tempo, {first_tempo}\n',
            '1, 0, End_track\n',
            '2, 0, Start_track\n',
            '2, 0, Title_t, "EVENTS"\n',
            '2, 0, End_track\n',
            '3, 0, Start_track\n',
            '3, 0, Title_t, "PART GUITAR"\n',
        ]

        for row in sorted_data:
            if row[2] == "Tempo":
                continue
            tick = second2tick(row[-1], self.signature, first_tempo)
            r = row[:-1]
            r[1] = str(tick)

            to_save.append(', '.join(r) + "\n")
             
        last_tick = to_save[-1].split(', ')[1]

        to_save.append(f'3, {last_tick}, End_track\n')
        to_save.append('0, 0, End_of_file\n')

        self.midi_data = to_save
        
        self.save(to_save, "notes.mid")

    def __get_notes_filter_by_color__(self, color):
        return [
            range(60, 97, 12),
            range(61, 98, 12),
            range(62, 99, 12),
            range(63, 100, 12),
            range(64, 101, 12),
        ][color]
    
    def __get_notes_filter_by_difficulty__(self, difficulty):
        return [
            range(60, 65),
            range(72, 77),
            range(84, 89),
            range(96, 101),
        ][difficulty]

    def get_song_size(self):
        filtered_track = list(filter(lambda x: "Note_on_c" in x,  self.midi_data))
        last_tick = int(filtered_track[-1].split(', ')[1])
        return last_tick + (768 - (last_tick % 768))
        


    def get_notes_by_time(self, difficulty):
        filtered_track = list(filter(lambda x: "Note_on_c" in x,  self.midi_data))

        last_tick = int(filtered_track[-1].split(', ')[1])

        song_size = last_tick + (768 - (last_tick % 768))
        song_empty = range(song_size)

        data = list(map(lambda x: x.replace('\n', '').split(', '), filtered_track))
        notes_filter = self.__get_notes_filter_by_difficulty__(difficulty)
        filtered_notes = filter(lambda x: int(x[-2]) in notes_filter, data)
        final_data = [
            list(range(5)), # key down layer
            list(range(5)), # key up layer
        ]
        for color in range(5):
            color_filter = self.__get_notes_filter_by_color__(color)

            key_down_data = filter(lambda x: int(x[-2]) in color_filter and int(x[-1]) == 100, filtered_notes)
            key_down = list(map(lambda x: int(x[1]), key_down_data))
            final_data[0][color] = list(map(lambda x: 1 if x in key_down else 0, song_empty))
            

            key_up_data = filter(lambda x: int(x[-2]) in color_filter and int(x[-1]) == 0, filtered_notes)
            key_up = list(map(lambda x: int(x[1]), key_up_data))
            final_data[1][color] = list(map(lambda x: 1 if x in key_up else 0, song_empty))

        return array(final_data)
        



        

In [3]:
clone_hero_songs_folder = 'Songs'

song_folders = [f.path for f in scandir(clone_hero_songs_folder) if f.is_dir()]


for song_folder in song_folders:
    print(f"MIDI file recreated: {path.basename(song_folder)}")
    mid = MidiHeroFile(song_folder)
    mid.recreate_midi_file()

MIDI file recreated: Audioslave - Cochise
MIDI file recreated: Bad Religion - Infected
MIDI file recreated: Black Sabbath - Iron Man
MIDI file recreated: Blue Öyster Cult - Godzilla
MIDI file recreated: Boston - More Than a Feeling
MIDI file recreated: Burning Brides - Heart Full of Black
MIDI file recreated: Cream - Crossroads
MIDI file recreated: David Bowie - Ziggy Stardust
MIDI file recreated: Deep Purple - Smoke on the Water
MIDI file recreated: Franz Ferdinand - Take Me Out
MIDI file recreated: Helmet - Unsung
MIDI file recreated: Incubus - Stellar
MIDI file recreated: Joan Jett & the Blackhearts - I Love Rock 'n Roll
MIDI file recreated: Judas Priest - You've Got Another Thing Comin'
MIDI file recreated: Megadeth - Symphony of Destruction
MIDI file recreated: Motörhead - Ace of Spades
MIDI file recreated: Ozzy Osbourne - Bark at the Moon
MIDI file recreated: Pantera - Cowboys from Hell
MIDI file recreated: Queen - Killer Queen
MIDI file recreated: Queens of the Stone Age - No On

# Creating dataset

In [4]:
class MIDIHeroDataset(Dataset):
    def __init__(self, name, midi_files: List[MidiHeroFile], from_difficuty=0, to_difficuty=3):
        print(f"Generating {name} dataset with {len(midi_files)} songs...")
        features_full = [data.get_notes_by_time(from_difficuty) for data in midi_files]
        label_full = [data.get_notes_by_time(to_difficuty) for data in midi_files]
        sizes_full = [data.get_song_size() // 768 for data in midi_files]
        self.__size = sum(sizes_full)
        self.data = []
        self.name = name
        self.__unsqueeze_items__(features_full, label_full, sizes_full)
        print("Done!")

    def __unsqueeze_items__(self, features_full, label_full, sizes_full):
        for size_id, size in enumerate(sizes_full):
            features = split(features_full[size_id], indices_or_sections=size, axis=2)
            labels = split(label_full[size_id], indices_or_sections=size, axis=2)
            for s in range(size):
                self.data.append((features[s], labels[s]))

    def __len__(self):
        return self.__size
    
    def __getitem__(self, index):
        return self.data[index]
    
    def __str__(self):
        return f"Dataset: {self.name}\nNumber of Samples: {self.__len__()}"


In [5]:
midi_files = [MidiHeroFile(song_folder) for song_folder in song_folders]

dataset_size = len(midi_files)
train_size = int(len(midi_files) * 0.7)
test_size = int((dataset_size - train_size) * 0.6)
valid_size = dataset_size - train_size - test_size

print(f'''
dataset_size = {dataset_size}
train_size   = {train_size}
test_size    = {test_size}
valid_size   = {valid_size}
''')

train_dataset = MIDIHeroDataset("train", midi_files[:train_size])
test_dataset = MIDIHeroDataset("test", midi_files[train_size: train_size + test_size])
valid_dataset = MIDIHeroDataset("valid", midi_files[train_size + test_size:])


dataset_size = 30
train_size   = 21
test_size    = 5
valid_size   = 4

Generating train dataset with 21 songs...
Done!
Generating test dataset with 5 songs...
Done!
Generating valid dataset with 4 songs...
Done!


# Saving Dataset

In [6]:
with open('train_dataset.pkl', 'wb') as file:
    dump(train_dataset, file)

with open('test_dataset.pkl', 'wb') as file:
    dump(test_dataset, file)

with open('valid_dataset.pkl', 'wb') as file:
    dump(valid_dataset, file)

# Loading Dataset

In [7]:

with open('train_dataset.pkl', 'rb') as file:
    train_dataset = load(file)
    print(train_dataset)

with open('test_dataset.pkl', 'rb') as file:
    test_dataset = load(file)
    print(test_dataset)

with open('valid_dataset.pkl', 'rb') as file:
    test_dataset = load(file)
    print(test_dataset)

Dataset: train
Number of Samples: 4937
Dataset: test
Number of Samples: 1126
Dataset: valid
Number of Samples: 945
