In [6]:
import jams
from AudioPreprocessor import AudioPreprocessor
from DatasetPreprocessor import DatasetPreprocessor
import Settings
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np

datasetPreprocessor = DatasetPreprocessor()
audioPreprocessor = AudioPreprocessor()

In [8]:
w = 192
h = 192

anno_path = './YOLO/_annotations.txt'
px = 1/plt.rcParams['figure.dpi']  # pixel in inches
dpi = 100

In [58]:
def save_image(filename, audio_parts, folder):
    for i in range(len(audio_parts)):
        fig = plt.figure(frameon=False, figsize=(w/dpi, h/dpi), dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(librosa.amplitude_to_db(np.swapaxes(audio_parts[i], 0, 1), ref=np.max), y_axis='linear', x_axis='time')
        plt.savefig(f'./YOLO/{folder}/{filename}_{i}')
        plt.clf()

def save_anno(filename, parts_borders, chunk_dur, anno_file, folder):

    def to_pixels(time, part_start, chunk_dur):
        return round((time - part_start) / chunk_dur)


    file_anno = Settings.annotations_path + filename + ".jams"
    jam = jams.load(file_anno)

    for i in range(len(parts_borders)):
        anno_file.write(f'.\{folder}\{filename}_{i}.png')
        for string_num in range(6):
            anno = jam.annotations["note_midi"][string_num]
            for note in anno["data"]:
                end = note[0] + note[1]
                if note[0] < parts_borders[i][0] and parts_borders[i][0] < end < parts_borders[i][1]:
                    anno_file.write(f' {0},0,{to_pixels(end, parts_borders[i][0], chunk_dur)},{h-1},0')
                elif end > parts_borders[i][1] and parts_borders[i][0] < note[0] < parts_borders[i][1]:
                    anno_file.write(f' {to_pixels(note[0], parts_borders[i][0], chunk_dur)},0,{191},{h-1},0')
                elif parts_borders[i][0] < note[0] < parts_borders[i][1] and parts_borders[i][0] < end < parts_borders[i][1]:
                    anno_file.write(f' {to_pixels(note[0], parts_borders[i][0], chunk_dur)},0,{to_pixels(end, parts_borders[i][0], chunk_dur)},{h-1},0')
        anno_file.write('\n')


def process_file(filename, anno_file, folder):
    file_audio = Settings.mic_path + filename + "_mic.wav"
    audio_chunks = audioPreprocessor.process_audiofile(file_audio)
    dur = librosa.get_duration(filename = Settings.mic_path + filename + "_mic.wav")
    chunk_dur = dur/audio_chunks.shape[0]

    audio_parts, parts_borders = split_file(audio_chunks, chunk_dur)

    save_image(filename, audio_parts, folder)
    save_anno(filename, parts_borders, chunk_dur, anno_file, folder)

def split_file(audio_chunks, chunk_dur):
    audio_parts = []
    parts_borders = []
    for i in range(0, audio_chunks.shape[0], 96):
        if i + 192 > audio_chunks.shape[0]:
            last_part = np.zeros((192,192))
            last_part[0:audio_chunks.shape[0]-i] = audio_chunks[i:audio_chunks.shape[0]]    
            audio_parts.append(last_part)
            parts_borders.append([i*chunk_dur,(i+192)*chunk_dur])
            break
        else:
            audio_parts.append(np.array(audio_chunks[i:i+192]))
            parts_borders.append([i*chunk_dur,(i+192)*chunk_dur])
    return audio_parts, parts_borders


def split_files(files):
    train_files = []
    test_files = []
    val_files = []
    for i in files:
        result = np.random.choice(3, p=[0.8, 0.1, 0.1])
        if result == 0:
            train_files.append(i)
        elif result == 1:
            test_files.append(i)
        else:
            val_files.append(i)
    return train_files, test_files, val_files

def process_files(test=False):
    files = datasetPreprocessor.get_all_filenames()
    train_files, test_files, val_files = split_files(files)
    if test:
        with open('./YOLO/rofl/_annotations.txt', 'w') as anno_file:
            process_file('00_BN1-129-Eb_comp' , anno_file, 'rofl')
    else:
        with open('./YOLO/train/_annotations.txt', 'w') as anno_file:
            for file in train_files:
                process_file(file, anno_file, 'train')
        with open('./YOLO/test/_annotations.txt', 'w') as anno_file:
            for file in test_files:
                process_file(file, anno_file, 'test')
        with open('./YOLO/val/_annotations.txt', 'w') as anno_file:
            for file in val_files:
                process_file(file, anno_file, 'val')        


In [59]:
process_files(test=True)

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

<Figure size 192x192 with 0 Axes>

In [7]:
last_part = [[0]*192]*192
last_part[0:audio_chunks.shape[0]-i] = audio_chunks[i:audio_chunks.shape[0]]

(np.array([[0]*192]*192)).shape

(192, 192)