In [1]:
import os
import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import random
from pytorch_lightning.loggers import WandbLogger
import wandb
import auraloss
import collections
from tqdm import tqdm
import pretty_midi
import matplotlib.pyplot as plt
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
print(torch.cuda.is_available())
import plotly.graph_objects as go
from torch.optim import lr_scheduler
from IPython.display import Audio
from torchaudio.transforms import Fade
import musdb
import museval
import gc
import pandas as pd
import sklearn
from sklearn.metrics import recall_score, precision_score, f1_score, jaccard_score, accuracy_score,zero_one_loss

True


In [8]:
class Track:
    def __init__(self, name, midi_path, drum_path, mix_path, adtof_path):
        self.name = name
        self.midi_path = midi_path
        self.drum_path = drum_path
        self.mix_path = mix_path
        self.targets = {'drums': '', 'bass': ''}
        self.rate = 44100
        self.subset = 'test'
        self.adtof_path = adtof_path

In [9]:
class AudioData:
    def __init__(self, audio):
        self.audio = audio

In [10]:
seed_value = 3407
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision('high')

In [11]:
mix_folder = 'D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/audio/full_mix/'
mixes = os.listdir(mix_folder)
mixes = [mix_folder + m for m in mixes]

drum_folder = 'D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/audio/drum_only/'
drum = os.listdir(drum_folder)
drum = [drum_folder + d for d in drum]

beats_folder = 'D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/annotations/beats/'
beats = os.listdir(beats_folder)
beats = [beats_folder + b for b in beats]#

class_folder = 'D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/annotations/subclass/'
classes = os.listdir(class_folder)
classes = [class_folder + c for c in classes]

midi_folder = 'D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/midi/'
midis = os.listdir(midi_folder)
midis = [midi_folder + m for m in midis]

In [12]:
all_tracks = []
for idx, val in tqdm(enumerate(classes)):

    name = val.replace('D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/annotations/subclass/', '')
    name = name.replace('_subclass.txt', '')

    adtof_path = f"D:/Github/phd-drum-sep/data/MDBDrums-master/MDB Drums/audio/adtof/{name}_MIX.wav.mid"

    t = Track(name, midis[idx], drum[idx], mixes[idx], adtof_path)
    all_tracks.append(t)

23it [00:00, ?it/s]


In [13]:
def turn_transcription_into_roll(transcription, frames):
    # Determine your sampling frequency (frames per second)
    fs = 44100
    
    piano_roll_length = int(frames)
    
    # Initialize the piano roll array
    piano_roll = np.zeros((64, piano_roll_length))
    
    # Fill in the piano roll array
    for note in transcription.instruments[0].notes:
        # Convert start and end times to frame indices
        start_frame = int(np.floor(note.start * fs))
        end_frame = int(np.ceil(note.end * fs))
        
        # Set the corresponding frames to 1 (or note.velocity for a velocity-sensitive representation)
        piano_roll[note.pitch, start_frame:end_frame] = 1  # Or use note.velocity
        
    roll = np.vstack([piano_roll[35:36, :], piano_roll[38:39, :], piano_roll[42:43, :], piano_roll[47:48, :], piano_roll[49:50, :]])
    return roll

In [14]:
def load_audio(path):
    audio_tensors = []
    waveform, _ = torchaudio.load(path)
    return waveform

def load_roll(path, frames):
    transcription = pretty_midi.PrettyMIDI(path)
    roll = turn_transcription_into_roll(transcription, frames)

    return torch.from_numpy(roll).float()


In [19]:
out_dir = f"D:/Github/phd-drum-sep/model-as-adt/results_adtof/"
rows = []
for track in tqdm(all_tracks):

    mixture_tensor = load_audio(track.mix_path)
    shape = mixture_tensor.shape[1]
    
    snippet_length = (mixture_tensor.shape[1] // (44100 * 4)) * (44100 * 4)
    mixture_tensor = mixture_tensor[:, :snippet_length]

    roll_tensor = load_roll(track.midi_path, shape)
    roll_tensor = roll_tensor[:, :snippet_length]

    adtof_tensor = load_roll(track.adtof_path, shape) 
    adtof_tensor = adtof_tensor[:, :snippet_length]
    
    chunk_len = int(44100 * 4)

    for start in range(0, snippet_length, chunk_len):
        i = start // chunk_len
        end = start + chunk_len

        drum_chunk_ = roll_tensor[:, start:end].numpy()
        proposed_transcription_ = adtof_tensor[:, start:end].numpy()

        for drum in range(5):
            drum_chunk = drum_chunk_[drum, :]
            proposed_transcription = proposed_transcription_[drum, :]
            recall = recall_score(drum_chunk, proposed_transcription, average='weighted', zero_division=0)
            precision = precision_score(drum_chunk, proposed_transcription, average='weighted', zero_division=0)
            f1 = f1_score(drum_chunk, proposed_transcription, average='weighted')
            jaccard = jaccard_score(drum_chunk, proposed_transcription, average='weighted')
            acc = accuracy_score(drum_chunk, proposed_transcription)
            rows.append([track.name, i, drum, recall, precision, f1, jaccard, acc])

        drum_chunk = drum_chunk_.flatten()
        proposed_transcription = proposed_transcription_.flatten()
        recall = recall_score(drum_chunk, proposed_transcription, average='weighted', zero_division=0)
        precision = precision_score(drum_chunk, proposed_transcription, average='weighted', zero_division=0)
        f1 = f1_score(drum_chunk, proposed_transcription, average='weighted')
        jaccard = jaccard_score(drum_chunk, proposed_transcription, average='weighted')
        acc = accuracy_score(drum_chunk, proposed_transcription)
        rows.append([track.name, i, 'all', recall, precision, f1, jaccard, acc])
            
df_results = pd.DataFrame(rows, columns=['track_name', 'slice', 'drum', 'recall', 'precision', 'f1', 'jaccard', 'accuracy'])
df_results.to_csv(f"D:/Github/phd-drum-sep/model-as-adt/results_final/adtof.csv")
    

    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [08:23<00:00, 21.88s/it]


OSError: Cannot save file into a non-existent directory: 'D:\Github\phd-drum-sep\model-as-adt\results_adtof'

In [None]:
df_results