In [2]:
import torch.utils.data as data
import os
import numpy as np
import json

class Dataset(data.Dataset):
    def __init__(self, data_dir_path="preprocessed/", mode="wave"):
        self.data_dir_path = data_dir_path
        self.mode = mode
        self.tab_data_paths = self.get_tab_data_paths()
        self.audio_data_paths = self.get_audio_data_paths()
        self.effect_list = {
            # "dead" : "ded",
            # "ghost_note" : "gst",
            "harmonic" : "har",
            "vibrato" : "vib",
            "bend1" : "bn1",
            "bend2" : "bn2",
            "bend3" : "bn3",
            # "slide" : "sld",
            # "hammer" : "h_p",
            "trill" : "trl",
            # "palm_mute" : "brm",
            # "staccato" : "stc",
            # "slap_effect" : "slp"
            "tie" : "tie"
        }
        self.effect_idx = []
        for effect in self.effect_list:
            self.effect_idx.append(self.effect_list[effect])
        output = self.conmine_tab_and_audio_data()
        
    def get_tab_data_paths(self):
        data_paths = []
        tab_dir_path = os.path.join(self.data_dir_path, "tab")
        for file in os.listdir(tab_dir_path):
            data_paths.append(os.path.join(tab_dir_path, file))
        return data_paths
    
    def get_audio_data_paths(self):
        data_paths = []
        if self.mode == "wave":
            audio_dir_path = os.path.join(self.data_dir_path, "raw_wave")
        elif self.mode == "stft":
            audio_dir_path = os.path.join(self.data_dir_path, "stft")
        elif self.mode == "mel":
            audio_dir_path = os.path.join(self.data_dir_path, "mel")
        else:
            audio_dir_path = os.path.join(self.data_dir_path, "cqt")
        for file in os.listdir(audio_dir_path):
            data_paths.append(os.path.join(audio_dir_path, file))
        return data_paths
    
    def encode_onehot_position(self, data):
        position = np.zeros((6, 21))
        for string in data:
            if data[string] == "-":
                fret = 0
            else:
                fret = int(data[string]) + 1
            position[int(string)-1][fret] = 1
        return position
    
    def encode_onehot_effects(self, data):
        effects = np.zeros((6, 6))
        for string in data:
            if data[string] == "-":
                effect = 0
            else:
                effect = self.effect_idx.index(data[string]) + 1
            effects[int(string)-1][effect] = 1
        return effects
    
    def encode_onehot_ties(self, data):
        ties = np.zeros((6, 2))
        for string in data:
            if data[string] == "-":
                tie = 0
            else:
                tie = 1
            ties[int(string)-1][tie] = 1
        return ties
    
    def encode_tab_data(self, data_path):
        # load tab json data
        with open(data_path, "r") as f:
            tab_json_data = json.load(f)
        header = tab_json_data[0]
        content = tab_json_data[1:]
        all_tab = []
        for tab in content:
            position = self.encode_onehot_position(tab["position"])
            effects = self.encode_onehot_effects(tab["effects"])
            ties = self.encode_onehot_ties(tab["ties"])
            tab = {
                "position" : position,
                "effects" : effects,
                "ties" : ties
            }
            all_tab.append(tab)
        return all_tab
    
    def encode_audio_data(self, data_path):
        audio_data = np.load(data_path)
        print(data_path)
        for i in audio_data:
            print(audio_data[i])
        return audio_data
    
    def conmine_tab_and_audio_data(self):
        output = {}
        for tab_number in range(len(self.tab_data_paths)):
            tab_data_path = self.tab_data_paths[tab_number]
            audio_data_path = self.audio_data_paths[tab_number]
            tab_data = self.encode_tab_data(tab_data_path)
            audio_data = self.encode_audio_data(audio_data_path)
            output[tab_number] = {
                "tab" : tab_data,
                "audio" : audio_data
            }

In [3]:
dataset = Dataset()

preprocessed/raw_wave\test.npz
[-7.5400909e-05 -3.8015994e-04 -3.6436823e-04 ... -1.9202343e-03
 -2.2865944e-03  0.0000000e+00]
preprocessed/raw_wave\track1.npz
[0. 0. 0. ... 0. 0. 0.]


preprocessed/raw_wave\track11.npz
[0. 0. 0. ... 0. 0. 0.]
preprocessed/raw_wave\track22.npz
[0. 0. 0. ... 0. 0. 0.]
preprocessed/raw_wave\track26.npz
[0. 0. 0. ... 0. 0. 0.]
preprocessed/raw_wave\track6.npz
[0. 0. 0. ... 0. 0. 0.]
preprocessed/raw_wave\track9.npz
[0. 0. 0. ... 0. 0. 0.]


In [7]:
audio_data = np.load("preprocessed/raw_wave/track1.npz")
for i in audio_data:
    print(audio_data[i].shape)
    print(audio_data[i][11000:11100])

(2076640,)
[-0.40441516 -0.20730835 -0.14985488 -0.21438111 -0.31380218 -0.35219866
 -0.36863843 -0.39668226 -0.27920356 -0.03729172  0.18684599  0.29000407
  0.22556412  0.13012543  0.07630409  0.04809059 -0.01338648 -0.10819913
 -0.11855996 -0.03977787  0.07675742  0.24016793  0.3440159   0.2664843
  0.09843533 -0.03134301 -0.0611284  -0.02907925 -0.02285722 -0.01479624
  0.04607621  0.11752041  0.15418424  0.17111874  0.20723847  0.26002508
  0.3045125   0.30283517  0.21231605  0.08031681  0.02536909  0.08837258
  0.16700383  0.16513439  0.07138973 -0.08380874 -0.24217425 -0.32954925
 -0.3219058  -0.25932962 -0.17846128 -0.1026732  -0.03791142  0.01246394
  0.04416192  0.07208603  0.10650379  0.13943939  0.16278672  0.19060689
  0.22806576  0.2687213   0.30174303  0.30977646  0.28646123  0.20215367
  0.04918964 -0.06659036 -0.03833349  0.00851639 -0.04518853 -0.14157286
 -0.24539205 -0.3241592  -0.34767148 -0.30819902 -0.21921112 -0.15112941
 -0.15682785 -0.20038496 -0.25343856 -0.3