In [None]:
#Connect Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa.feature
import matplotlib.pyplot as plt
import numpy as np
from keras import layers
from tensorflow import keras

def load_melspectrogram(audio_path, plot=False):
    y, sr = librosa.load(audio_path, sr=22050)
    melspectrogram = np.zeros((128, 20000), dtype=float)
    melspectrogram_full = librosa.feature.melspectrogram(y=y, sr=sr)
    melspectrogram[:, :melspectrogram_full.shape[1]] = melspectrogram_full[:, :20000]
    # times[i] = frames[i] * hop_length / sr -> 7,739984882842026 min
    if plot:
        fig, ax = plt.subplots()
        S_dB = librosa.power_to_db(melspectrogram, ref=np.max)
        img = librosa.display.specshow(S_dB, x_axis='time',
                                       y_axis='mel', sr=sr, ax=ax)
        fig.colorbar(img, ax=ax, format='%+2.0f dB')
        ax.set(title='Mel-frequency spectrogram')
        plt.show()
    return melspectrogram.transpose()

In [None]:
from enum import Enum
from typing import List, Optional
import abc


class SectionName(Enum):
    General = "[General]"
    Editor = "[Editor]"
    Metadata = "[Metadata]"
    Difficulty = "[Difficulty]"
    Events = "[Events]"
    TimingPoints = "[TimingPoints]"
    Colours = "[Colours]"
    HitObjects = "[HitObjects]"


class Section:

    def value(self, value):
        if value.strip().isnumeric():
            return int(value)
        else:
            try:
                return float(value)
            except ValueError:
                return value

    def parse_line(self, line: str):
        ...


class HitSample:
    normalSet: int = 0  # SampleSet
    additionSet: int = 0  # SampleSet
    index: int = 0
    volume: int = 0
    filename: Optional[str] = None

    def set(self, normalSet: int, additionSet: int, index: int, volume: int, filename: Optional[str] = ""):
        self.normalSet = normalSet
        self.additionSet = additionSet
        self.index = index
        self.volume = volume
        self.filename = filename

    def __str__(self):
        if self.filename is not None:
            return str(f"{self.normalSet}:{self.additionSet}:{self.index}:{self.volume}:{self.filename}:")
        else:
            return str(f"{self.normalSet}:{self.additionSet}:{self.index}:{self.volume}:")


class General(Section):

    def __init__(self,
                 AudioFilename: Optional[str] = None,
                 AudioLeadIn: Optional[int] = 0,
                 AudioHash: Optional[str] = None,
                 PreviewTime: Optional[int] = -1,
                 Countdown: Optional[int] = 1,
                 SampleSet: Optional[str] = "Normal",  # SampleSet.Normal.value
                 StackLeniency: Optional[float] = 0.7,
                 Mode: Optional[int] = 0,
                 LetterboxInBreaks: Optional[int] = 0,
                 StoryFireInFront: Optional[int] = 1,
                 UseSkinSprites: Optional[int] = 0,
                 AlwaysShowPlayfield: Optional[int] = 0,
                 OverlayPosition: Optional[str] = "NoChange",
                 SkinPreference: Optional[str] = None,
                 EpilepsyWarning: Optional[int] = 0,
                 CountdownOffset: Optional[int] = 0,
                 SpecialStyle: Optional[int] = 0,
                 WidescreenStoryboard: Optional[int] = 0,
                 SamplesMatchPlaybackRate: Optional[int] = 0):
        self.AudioFilename = AudioFilename
        self.OverlayPosition = OverlayPosition
        self.EpilepsyWarning = EpilepsyWarning
        self.SpecialStyle = SpecialStyle
        self.SamplesMatchPlaybackRate = SamplesMatchPlaybackRate
        self.WidescreenStoryboard = WidescreenStoryboard
        self.CountdownOffset = CountdownOffset
        self.SkinPreference = SkinPreference
        self.UseSkinSprites = UseSkinSprites
        self.AlwaysShowPlayfield = AlwaysShowPlayfield
        self.LetterboxInBreaks = LetterboxInBreaks
        self.StoryFireInFront = StoryFireInFront
        self.Mode = Mode
        self.StackLeniency = StackLeniency
        self.SampleSet = SampleSet
        self.Countdown = Countdown
        self.PreviewTime = PreviewTime
        self.AudioLeadIn = AudioLeadIn
        self.AudioHash = AudioHash

    def parse_line(self, line: str):
        members = line.split(':')
        self.__setattr__(members[0], self.value(members[1]))


class Editor(Section):
    def __init__(self,
                 Bookmarks: Optional[List[int]] = None,
                 DistanceSpacing: Optional[float] = None,
                 BeatDivisor: Optional[int] = None,
                 GridSize: Optional[int] = None,
                 TimelineZoom: Optional[float] = None):
        self.GridSize = GridSize
        self.BeatDivisor = BeatDivisor
        self.DistanceSpacing = DistanceSpacing
        self.Bookmarks = Bookmarks
        self.TimelineZoom = TimelineZoom

    def parse_line(self, line: str):
        members = line.split(':')
        if members[0] == "Bookmarks":
            self.Bookmarks = [self.value(x) for x in members[1].split(",")]
        else:
            self.__setattr__(members[0], self.value(members[1]))


class Metadata(Section):
    def __init__(self,
                 Title: Optional[str] = None,
                 TitleUnicode: Optional[str] = None,
                 Artist: Optional[str] = None,
                 ArtistUnicode: Optional[str] = None,
                 Creator: Optional[str] = None,
                 Version: Optional[str] = None,
                 Source: Optional[str] = None,
                 Tags: Optional[List[str]] = None,
                 BeatmapID: Optional[int] = None,
                 BeatmapSetID: Optional[int] = None):

        self.Tags = Tags
        self.BeatmapSetID = BeatmapSetID
        self.BeatmapID = BeatmapID
        self.Source = Source
        self.Version = Version
        self.Creator = Creator
        self.ArtistUnicode = ArtistUnicode
        self.Artist = Artist
        self.TitleUnicode = TitleUnicode
        self.Title = Title

    def parse_line(self, line: str):
        members = line.split(':')
        if members[0] == "Tags":
            self.Tags = [x for x in members[1].split(" ")]
        else:
            self.__setattr__(members[0], self.value(members[1]))


class Difficulty(Section):
    HPDrainRate: float
    CircleSize: float
    OverallDifficulty: float
    ApproachRate: float
    SliderMultiplier: float
    SliderTickRate: float

    def parse_line(self, line: str):
        members = line.split(':')
        self.__setattr__(members[0], self.value(members[1]))


class EventParams:
    pass


class Event(Section):
    eventType: str
    startTime: int
    eventParams: List[EventParams]


class Background(EventParams):
    filename: str
    xOffset: int
    yOffset: int


class Video(EventParams):
    Video: 1
    startTime: int
    filename: str
    xOffset: int
    yOffset: int


class Pause(EventParams):
    # 2:Break TODO check wiki because sintaxe is strange
    Break: 2
    startTime: int
    endTime: int


#  TODO
class Storyboard(EventParams):
    pass


class TimingPoint(Section):
    time: int
    beatLength: float
    meter: int
    sampleSet: int = 1  # SampleSet = SampleSet.Normal.value
    sampleIndex: int = 0
    volume: int = 1
    uninherited: int
    effects: int = 0  # Effect = None
    bpm: int

    def parse_line(self, line: str):
        members = line.split(",")
        self.time = self.value(members[0])
        self.beatLength = self.value(members[1])
        self.meter = self.value(members[2])
        self.sampleSet = self.value(members[3])
        self.sampleIndex = self.value(members[4])
        self.volume = self.value(members[5])
        self.uninherited = self.value(members[6])
        self.effects = self.value(members[7])
        self.calculate_bpm()

    def calculate_bpm(self):
        self.bpm = round(60000 / self.beatLength)


# TODO check wiki for colours
class ColourObject(Section):
    Combo: int
    color: List[int]

    # SliderTrackOverride
    # SliderBorder
    def parse_line(self, line):
        pass


class HitObject(Section):
    # x: int
    # y: int
    # time: int
    # type: int
    # hitSound: int = 0
    # hitSample: str  # Optional[HitSample]

    def __init__(self,
                 x: Optional[int] = 0,
                 y: Optional[int] = 0,
                 time: Optional[int] = 0,
                 type: Optional[int] = 0,  # Type
                 hitSound: Optional[int] = 0,
                 hitSample: Optional[str] = None):
        self.x = x
        self.y = y
        self.time = time
        self.type = type
        self.hitSound = hitSound
        if hitSample is None:
            self.hitSample = HitSample().__str__()
        else:
            self.hitSample = hitSample

    def __str__(self):
        return f"{self.x},{self.y},{self.time},{self.type},{self.hitSound},{self.hitSample}"

    def get_hit_sample(self, line) -> str:
        if self.has_hit_sample(line):
            return line
        return "0:0:0:0:0:"

    def has_hit_sample(self, line) -> bool:
        if type(line) == int or type(line) == float:
            return False
        else:
            return True

    def get(self, _type):
        return self.__dict__.get(str(_type))

    def get_type(self, _type):
        if _type & 1:
            print("circle")
        elif _type & 2:
            print("slider")
        elif _type & 8:
            print("spinner")
        # elif _type & 128:
        #     print("mania")
        else:
            print("unknown type:", _type)

    def is_slider(self, _type) -> bool:
        if _type & 2:
            return True
        return False

    def is_spinner(self, _type) -> bool:
        if _type & 8:
            return True
        return False

    def is_circle(self, _type) -> bool:
        if _type & 1:
            return True
        return False


class Cercle(HitObject):

    def __init__(self,
                 x: Optional[int] = 0,
                 y: Optional[int] = 0,
                 time: Optional[int] = 0,
                 type: Optional[int] = 0,  # Type
                 hitSound: Optional[int] = 0,
                 hitSample: Optional[str] = None):
        super().__init__(x, y, time, type, hitSound, hitSample)

    def parse_line(self, line):
        members = line.split(",")
        self.x = self.value(members[0])
        self.y = self.value(members[1])
        self.time = self.value(members[2])
        self.type = self.value(members[3])
        self.hitSound = self.value(members[4])
        self.hitSample = self.get_hit_sample(self.value(members[-1]))


class Spinner(HitObject):
    endTime: int

    def parse_line(self, line):
        members = line.split(",")
        self.x = self.value(members[0])
        self.y = self.value(members[1])
        self.time = self.value(members[2])
        self.type = self.value(members[3])
        self.hitSound = self.value(members[4])
        self.endTime = self.value(members[5])

        self.hitSample = self.get_hit_sample(self.value(members[-1]))


class CurvePoint:
    x: int
    y: int

    def __str__(self):
        return f"{self.x}:{self.y}"


class Slider(HitObject):
    curveType: str
    curvePoints: List[CurvePoint]
    slides: int
    length: float
    edgeSounds: str
    edgeSets: str

    def parse_line(self, line):
        members = line.split(",")
        self.x = self.value(members[0])
        self.y = self.value(members[1])
        self.time = self.value(members[2])
        self.type = self.value(members[3])
        self.hitSound = self.value(members[4])

        # Parse slider points
        points = (members[5] or '').split('|')
        self.curveType = points[0]
        self.curvePoints = []
        if len(points):
            for i in range(1, len(points)):
                coordinates = points[i].split(':')
                curve_point = CurvePoint()
                curve_point.x = self.value(coordinates[0])
                curve_point.y = self.value(coordinates[1])
                # self.curvePoints.append(curve_point)
                self.curvePoints.append(curve_point)

        # Parse repeat slides bumber & length
        self.slides = int(members[6])
        self.length = int(round(float(members[7])))

        # Parse edgeSounds
        if len(members) > 9:
            if members[8]:
                self.edgeSounds = members[8]

            # Parse edgeSets
            if members[9]:
                self.edgeSets = members[9]

        self.hitSample = self.get_hit_sample(self.value(members[-1]))


In [None]:
import codecs
import os
import re
from typing import List


class Parser:
    def __init__(self):
        self.file_format = ""
        self.general = General()
        self.editor = Editor()
        self.metadata = Metadata()
        self.difficulty = Difficulty()
        self.events: List[Event] = []
        self.timing_points: List[TimingPoint] = []
        self.colours: List[ColourSection] = []
        self.hit_objects: List[HitObject] = []

        self.osu_section = ""

    def parse_hit_object_type(self, line):
        _type = int(line.split(",")[3].strip())
        # https://osu.ppy.sh/wiki/fr/Client/File_formats/Osu_%28file_format%29#type
        # convert in bit
        # 0: Cercle
        # 1: Slider
        # 3:Spinner
        # 7 osu mania
        if _type & 1:
            cercle = Cercle()
            cercle.parse_line(line)
            return cercle
        elif _type & 2:
            slider = Slider()
            slider.parse_line(line)
            return slider
        elif _type & 8:
            spinner = Spinner()
            spinner.parse_line(line)
            return spinner
        # elif _type & 128:
        #     print("mania")
        else:
            cercle = Cercle()
            cercle.parse_line(line)
            print("unknown type:", _type)
            return cercle

    def parse_line(self, line: str):
        line = line.strip()
        if not line:
            return

        match = re.search(r"\[(.*?)\]", line)
        if match:
            self.osu_section = match.group(0)
            return
        match = re.match('^osu file format (v[0-9]+)$', line)
        if match:
            # self.file_format = line
            self.file_format = match.group(1)
            return
        if self.osu_section == SectionName.General.value:
            self.general.parse_line(line)
        elif self.osu_section == SectionName.Editor.value:
            self.editor.parse_line(line)
        elif self.osu_section == SectionName.Metadata.value:
            self.metadata.parse_line(line)
        elif self.osu_section == SectionName.Difficulty.value:
            self.difficulty.parse_line(line)
        # elif self.osu_section == SectionName.Events.name:
        #     self.events_section.append(line)
        elif self.osu_section == SectionName.TimingPoints.value:
            timing_point = TimingPoint()
            timing_point.parse_line(line)
            self.timing_points.append(timing_point)
        # elif self.osu_section == SectionName.Colours.name:
        #     self.colours_section.append(line)
        elif self.osu_section == SectionName.HitObjects.value:
            hit_obj = self.parse_hit_object_type(line)
            self.hit_objects.append(hit_obj)

    def parse_file(self, file):
        if os.path.isfile(file):
            with codecs.open(file, 'r', encoding="utf-8") as file:
                line = file.readline()
                while line:
                    self.parse_line(line)
                    line = file.readline()

In [None]:
def scale_beatmap(hitpoints: List[HitObject]):
    # we take 7min30s for each beatmap
    duration = 7.739984882842026 * 60
    new_hitpoints = []
    for h in hitpoints:
        if h.time <= duration * 1000:
            new_hitpoints.append(h)
    # hitpoints = [x for x in hitpoints if x[2] <= duration * 1000]
    return new_hitpoints


def load_beatmap_attributes(path):
    cols = ["x", "y", "time", "type", "endtime", "x2", "2", "x3", "y3", "x4", "y4", "slide", "length"]

    parser = Parser()
    parser.parse_file(path)
    max_hit_object = 4000
    data = np.zeros((13, max_hit_object), dtype=object)

    hitpoints = scale_beatmap(parser.hit_objects)

    for (i, o) in enumerate(hitpoints):

        if i < max_hit_object:

            data[0][i] = o.x
            data[1][i] = o.y
            data[2][i] = o.time
            data[3][i] = o.type

            if isinstance(o, Cercle):
                pass
            elif isinstance(o, Spinner):
                data[4][i] = o.endTime
            elif isinstance(o, Slider):
                data[5][i] = o.curvePoints[0].x
                data[6][i] = o.curvePoints[0].y

                if len(o.curvePoints) > 1:
                    data[7][i] = o.curvePoints[1].x
                    data[8][i] = o.curvePoints[1].y

                if len(o.curvePoints) > 2:
                    data[9][i] = o.curvePoints[2].x
                    data[10][i] = o.curvePoints[2].y

                data[11][i] = o.slides
                data[12][i] = o.length

    return data, parser.difficulty.OverallDifficulty


def load_beatmaps_and_spectrograms(paths: List, max:int):
    arr = []
    diff = []
    spectrograms = []
    for i, path in enumerate(paths):
        print(path[1])
        if i >= max:
            break
        spectrogram = load_melspectrogram(path[1])
        spectrogram = normalize(spectrogram)
        for beatmap in path[0]:
            df_temp, difficulty = load_beatmap_attributes(beatmap)
            df_temp = df_temp.transpose()
            arr.append(df_temp)
            diff.append(difficulty)
            spectrograms.append(spectrogram)
    diff = np.array(diff, dtype=float)
    return arr, spectrograms, diff


def normalize(img):
    '''
    Normalizes an array
    (subtract mean and divide by standard deviation)
    '''
    eps = 0.001
    if np.std(img) != 0:
        img = (img - np.mean(img)) / np.std(img)
    else:
        img = (img - np.mean(img)) / eps
    return img


def contains_any_index(root, a_list):
    for i, c in enumerate(a_list):
        if c.startswith(root):
            return i + 1
    return 0


def get_paths(dir_path, max=0):
    file_paths = []

    for i, dir in enumerate(os.listdir(dir_path)):
        audio = ""
        beatmaps = []
        if i and i >= max:
          break
        for file in os.listdir(os.path.join(dir_path, dir)):
            if file.endswith(".mp3"):
                audio = os.path.join(dir_path, dir, file)
            else:
                beatmaps.append(os.path.join(dir_path, dir, file))
        file_paths.append((beatmaps, audio))

    return file_paths

In [None]:
from keras.utils.vis_utils import plot_model
base_path = "/content/drive/MyDrive"
paths = get_paths(base_path)
df, spectrograms, diff = load_beatmaps_and_spectrograms(paths, 20)
x_train = spectrograms
x_train = np.array(x_train, dtype=float)
y_train = df
y_train = np.array(y_train, dtype=float)
decoder_input = np.zeros((len(y_train), 4001, 13))

# Ajout des tokens de début et de fin de séquence
index = 0
start_of_sequence = np.array([-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0])
end_of_sequence = np.array([-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]).reshape((1, 1, -1))
end_of_sequence = np.repeat(end_of_sequence, len(y_train), axis=0)

for sublist in decoder_input:
    sublist[0] = start_of_sequence
    sublist[1:] = y_train[index]

y_train = np.append(y_train, end_of_sequence, axis=1)

print("Taille de l'input d'entraînement : " + str(x_train.shape))
print("Taille de l'output d'entraînement : " + str(y_train.shape))

input_dim = 128
decoder_input_shape = 13
latent_dim = 256

# Model's input
input_spectrogram = keras.Input((None, input_dim), name="input_spectrogram")
# Expand the dimension to use 2D CNN.
x = layers.Reshape((-1, input_dim, 1), name="expand_dim")(input_spectrogram)
# Convolution layer 1
x = layers.Conv2D(
    filters=32,
    kernel_size=[11, 41],
    strides=[2, 2],
    padding="same",
    use_bias=False,
    name="conv_1",
)(x)
x = layers.BatchNormalization(name="conv_1_bn")(x)
x = layers.ReLU(name="conv_1_relu")(x)
# Convolution layer 2
x = layers.Conv2D(
    filters=32,
    kernel_size=[11, 21],
    strides=[1, 2],
    padding="same",
    use_bias=False,
    name="conv_2",
)(x)
x = layers.BatchNormalization(name="conv_2_bn")(x)
x = layers.ReLU(name="conv_2_relu")(x)
# Reshape the resulted volume to feed the RNNs layers
x = layers.Reshape((-1, x.shape[-2] * x.shape[-1]))(x)

encoder = keras.layers.LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(x)

# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = keras.Input(shape=(None, decoder_input_shape), name="input_teacher_forcing")

# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = layers.Dense(13, activation='relu')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = keras.Model([input_spectrogram, decoder_inputs], decoder_outputs)

model.compile(
    optimizer="adam",
    loss=keras.losses.MeanAbsoluteError(),
)
model.summary()
plot_model(model)


model.fit(
     [x_train, decoder_input],
     y_train,
     batch_size=20,
     epochs=10,
     validation_split=0.2
)

# Save model
model.save("MapCreator")
print("Sauvegarde du modèle terminée")

/content/drive/MyDrive/1151466 MIMI - Nanimo nai Youna/audio.mp3
[-2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2. -2.]
Taille de l'input d'entraînement : (5, 20000, 128)
Taille de l'output d'entraînement : (5, 4001, 13)
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_spectrogram (InputLayer)  [(None, 20000, 128)  0          []                               
                                ]                                                                 
                                                                                                  
 expand_dim (Reshape)           (None, 20000, 128,   0           ['input_spectrogram[0][0]']      
                                1)                                                                
                                                                        



Sauvegarde du modèle terminée
