In [2]:
import argparse
import model
import data
import torch
import time
from pathlib import Path
import tqdm
import json
import utils
import sklearn.preprocessing
import numpy as np
import random
from git import Repo
import os
import copy


tqdm.monitor_interval = 0

In [2]:
def train(args, unmix, device, train_sampler, optimizer):
    losses = utils.AverageMeter()
    unmix.train()
    pbar = tqdm.tqdm(train_sampler, disable=args.quiet) 
    for x, y in pbar:
        pbar.set_description("Training batch")
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        Y_hat = unmix(x)
        Y = unmix.transform(y)
        loss = torch.nn.functional.mse_loss(Y_hat, Y)
        loss.backward()
        optimizer.step()
        losses.update(loss.item(), Y.size(1))
    return losses.avg

In [3]:
def valid(args, unmix, device, valid_sampler):
    losses = utils.AverageMeter()
    unmix.eval()
    with torch.no_grad():
        for x, y in valid_sampler:
            x, y = x.to(device), y.to(device)
            Y_hat = unmix(x)
            Y = unmix.transform(y)
            loss = torch.nn.functional.mse_loss(Y_hat, Y)
            losses.update(loss.item(), Y.size(1))
        return losses.avg

In [4]:
def get_statistics(args, dataset):
    scaler = sklearn.preprocessing.StandardScaler()

    spec = torch.nn.Sequential(
        model.STFT(n_fft=args.nfft, n_hop=args.nhop),
        model.Spectrogram(mono=True)
    )

    dataset_scaler = copy.deepcopy(dataset)
    dataset_scaler.samples_per_track = 1
    dataset_scaler.augmentations = None
    dataset_scaler.random_chunks = False
    dataset_scaler.seq_duration = None
    pbar = tqdm.tqdm(range(len(dataset_scaler)), disable=args.quiet)
    for ind in pbar:
        x, y = dataset_scaler[ind]
        pbar.set_description("Compute dataset statistics")
        X = spec(x[None, ...])
        scaler.partial_fit(np.squeeze(X))

    # set inital input scaler values
    std = np.maximum(
        scaler.scale_,
        1e-4*np.max(scaler.scale_)
    )
    return scaler.mean_, std

In [5]:
parser = argparse.ArgumentParser(description='Open Unmix Trainer')

# which target do we want to train?
parser.add_argument('--target', type=str, default='vocals',
                    help='target source (will be passed to the dataset)')

# Dataset paramaters
parser.add_argument('--dataset', type=str, default="musdb",
                    choices=[
                        'musdb', 'aligned', 'sourcefolder',
                        'trackfolder_var', 'trackfolder_fix'
                    ],
                    help='Name of the dataset.')
parser.add_argument('--root', type=str, help='root path of dataset' , default='../rec_data/')
parser.add_argument('--output', type=str, default="../out_unmix/",
                    help='provide output path base folder name')
parser.add_argument('--model', type=str, help='Path to checkpoint folder', default='../checkpoint_unmix')

# Trainig Parameters
parser.add_argument('--epochs', type=int, default=1000)
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--lr', type=float, default=0.001,
                    help='learning rate, defaults to 1e-3')
parser.add_argument('--patience', type=int, default=140,
                    help='maximum number of epochs to train (default: 140)')
parser.add_argument('--lr-decay-patience', type=int, default=80,
                    help='lr decay patience for plateau scheduler')
parser.add_argument('--lr-decay-gamma', type=float, default=0.3,
                    help='gamma of learning rate scheduler decay')
parser.add_argument('--weight-decay', type=float, default=0.00001,
                    help='weight decay')
parser.add_argument('--seed', type=int, default=42, metavar='S',
                    help='random seed (default: 42)')


# Model Parameters
parser.add_argument('--seq-dur', type=float, default=6.0,
                    help='Sequence duration in seconds'
                    'value of <=0.0 will use full/variable length')
parser.add_argument('--unidirectional', action='store_true', default=False,
                    help='Use unidirectional LSTM instead of bidirectional')
parser.add_argument('--nfft', type=int, default=4096,
                    help='STFT fft size and window size')
parser.add_argument('--nhop', type=int, default=1024,
                    help='STFT hop size')
parser.add_argument('--hidden-size', type=int, default=512,
                    help='hidden size parameter of dense bottleneck layers')
parser.add_argument('--bandwidth', type=int, default=16000,
                    help='maximum model bandwidth in herz')
parser.add_argument('--nb-channels', type=int, default=2,
                    help='set number of channels for model (1, 2)')
parser.add_argument('--nb-workers', type=int, default=0,
                    help='Number of workers for dataloader.')

# Misc Parameters
parser.add_argument('--quiet', action='store_true', default=False,
                    help='less verbose during training')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')




_StoreTrueAction(option_strings=['--no-cuda'], dest='no_cuda', nargs=0, const=True, default=False, type=None, choices=None, help='disables CUDA training', metavar=None)

In [7]:
args, _ = parser.parse_known_args()

use_cuda = not args.no_cuda and torch.cuda.is_available()
print("Using GPU:", use_cuda)
print("Using Torchaudio: ", utils._torchaudio_available())
dataloader_kwargs = {'num_workers': args.nb_workers, 'pin_memory': True} if use_cuda else {}

repo_dir = os.path.abspath(os.path.dirname('/media/Sharedata/aniruddha/source_sep_Ani/train.py'))
repo = Repo(repo_dir)
commit = repo.head.commit.hexsha[:7]

# use jpg or npy
torch.manual_seed(args.seed)
random.seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")


Using GPU: True
Using Torchaudio:  False


In [7]:
train_dataset, valid_dataset, args = data.load_datasets(parser, args)

usage: ipykernel_launcher.py [-h] [--target TARGET]
                             [--dataset {musdb,aligned,sourcefolder,trackfolder_var,trackfolder_fix}]
                             [--root ROOT] [--output OUTPUT] [--model MODEL]
                             [--epochs EPOCHS] [--batch-size BATCH_SIZE]
                             [--lr LR] [--patience PATIENCE]
                             [--lr-decay-patience LR_DECAY_PATIENCE]
                             [--lr-decay-gamma LR_DECAY_GAMMA]
                             [--weight-decay WEIGHT_DECAY] [--seed S]
                             [--seq-dur SEQ_DUR] [--unidirectional]
                             [--nfft NFFT] [--nhop NHOP]
                             [--hidden-size HIDDEN_SIZE]
                             [--bandwidth BANDWIDTH]
                             [--nb-channels NB_CHANNELS]
                             [--nb-workers NB_WORKERS] [--quiet] [--no-cuda]
                             [--is_wav]
                        

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [8]:
target_path = Path(args.output)
target_path.mkdir(parents=True, exist_ok=True)

train_sampler = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    **dataloader_kwargs
)
valid_sampler = torch.utils.data.DataLoader(
    valid_dataset, batch_size=1,
    **dataloader_kwargs
)

NameError: name 'train_dataset' is not defined

In [9]:
train_dataset

NameError: name 'train_dataset' is not defined

In [None]:
parser.add_argument('--iswav', action='store_true', default=True, help='loads wav instead of STEMS')

In [None]:
args = parser.parse_args()

In [None]:
import pathlib

In [1]:
import torch.nn as nn

In [1]:
import tqdm

In [2]:
t = tqdm.trange(1,100)

  0%|          | 0/99 [00:00<?, ?it/s]

In [None]:
t.set_postfix

In [1]:
from utils import load_audio

In [None]:
load_audio.

In [1]:
import museval

In [1]:
museval.eval_mus_track

NameError: name 'museval' is not defined

In [2]:
import argparse
import model
import data
import torch
import time
from pathlib import Path
import tqdm
import json
import utils
import sklearn.preprocessing
import numpy as np
import random
from git import Repo
import os
import copy


tqdm.monitor_interval = 0


def train(args, unmix, device, train_sampler, optimizer):
    losses = utils.AverageMeter()
    unmix.train()
    pbar = tqdm.tqdm(train_sampler, disable=args.quiet) 
    for x, y in pbar:
        pbar.set_description("Training batch")
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        Y_hat = unmix(x)
        Y = unmix.transform(y)
        loss = torch.nn.functional.mse_loss(Y_hat, Y)
        loss.backward()
        optimizer.step()
        losses.update(loss.item(), Y.size(1))
    return losses.avg


def valid(args, unmix, device, valid_sampler):
    losses = utils.AverageMeter()
    unmix.eval()
    with torch.no_grad():
        for x, y in valid_sampler:
            x, y = x.to(device), y.to(device)
            Y_hat = unmix(x)
            Y = unmix.transform(y)
            loss = torch.nn.functional.mse_loss(Y_hat, Y)
            losses.update(loss.item(), Y.size(1))
        return losses.avg


def get_statistics(args, dataset):
    scaler = sklearn.preprocessing.StandardScaler()

    spec = torch.nn.Sequential(
        model.STFT(n_fft=args.nfft, n_hop=args.nhop),
        model.Spectrogram(mono=False)
    )

    dataset_scaler = copy.deepcopy(dataset)
    dataset_scaler.samples_per_track = 1
    dataset_scaler.augmentations = None
    dataset_scaler.random_chunks = False
    dataset_scaler.seq_duration = None
    pbar = tqdm.tqdm(range(len(dataset_scaler)), disable=args.quiet)
    for ind in pbar:
        x, y = dataset_scaler[ind]
        pbar.set_description("Compute dataset statistics")
        X = spec(x[None, ...])
        print("HELLO", np.squeeze(X)[:,0])
        scaler.partial_fit(np.squeeze(X))

    # set inital input scaler values
    std = np.maximum(
        scaler.scale_,
        1e-4*np.max(scaler.scale_)
    )
    return scaler.mean_, std


In [4]:
parser = argparse.ArgumentParser(description='Open Unmix Trainer')

# which target do we want to train?
parser.add_argument('--target', type=str, default='vocals',
                    help='target source (will be passed to the dataset)')

# Dataset paramaters
parser.add_argument('--dataset', type=str, default="aligned",
                    choices=[
                        'musdb', 'aligned', 'sourcefolder',
                        'trackfolder_var', 'trackfolder_fix'
                    ],
                    help='Name of the dataset.')
parser.add_argument('--root', type=str, help='root path of dataset', default='../rec_data/')
parser.add_argument('--output', type=str, default="../out_unmix/",
                    help='provide output path base folder name')
parser.add_argument('--model', type=str, help='Path to checkpoint folder')

# Trainig Parameters
parser.add_argument('--epochs', type=int, default=1000)
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--lr', type=float, default=0.01,
                    help='learning rate, defaults to 1e-3')
parser.add_argument('--patience', type=int, default=140,
                    help='maximum number of epochs to train (default: 140)')
parser.add_argument('--lr-decay-patience', type=int, default=80,
                    help='lr decay patience for plateau scheduler')
parser.add_argument('--lr-decay-gamma', type=float, default=0.999,
                    help='gamma of learning rate scheduler decay')
parser.add_argument('--weight-decay', type=float, default=0.00001,
                    help='weight decay')
parser.add_argument('--seed', type=int, default=42, metavar='S',
                    help='random seed (default: 42)')

# Model Parameters
parser.add_argument('--seq-dur', type=float, default=6.0,
                    help='Sequence duration in seconds'
                    'value of <=0.0 will use full/variable length')
parser.add_argument('--unidirectional', action='store_true', default=False,
                    help='Use unidirectional LSTM instead of bidirectional')
parser.add_argument('--nfft', type=int, default=4096,
                    help='STFT fft size and window size')
parser.add_argument('--nhop', type=int, default=1024,
                    help='STFT hop size')
parser.add_argument('--hidden-size', type=int, default=512,
                    help='hidden size parameter of dense bottleneck layers')
parser.add_argument('--bandwidth', type=int, default=16000,
                    help='maximum model bandwidth in herz')
parser.add_argument('--nb-channels', type=int, default=2,
                    help='set number of channels for model (1, 2)')
parser.add_argument('--nb-workers', type=int, default=8,
                    help='Number of workers for dataloader.')

# Misc Parameters
parser.add_argument('--quiet', action='store_true', default=False,
                    help='less verbose during training')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')

args, _ = parser.parse_known_args()

use_cuda = not args.no_cuda and torch.cuda.is_available()
print("Using GPU:", use_cuda)
print("Using Torchaudio: ", utils._torchaudio_available())
dataloader_kwargs = {'num_workers': args.nb_workers, 'pin_memory': True} if use_cuda else {}

# repo_dir = os.path.abspath(os.path.dirname(__file__))
# repo = Repo(repo_dir)
# commit = repo.head.commit.hexsha[:7]

# use jpg or npy
torch.manual_seed(args.seed)
random.seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")


Using GPU: True
Using Torchaudio:  False


In [6]:
train_dataset, valid_dataset, args = data.load_datasets(parser, args)

ArgumentError: argument --input-file: conflicting option string: --input-file

In [None]:
train_

In [11]:
stft_f = torch.randn(2,3,5)

In [25]:
stft_f = torch.randn(1,2,2049,5000,2)

In [26]:
stft_f.shape


torch.Size([1, 2, 2049, 5000, 2])

In [27]:
a=torch.mean(stft_f,1,keepdim=True)

In [28]:
a.shape

torch.Size([1, 1, 2049, 5000, 2])

In [29]:
stft_f = stft_f.transpose(2, 3)


In [30]:
stft_f = stft_f.pow(2).sum(-1)

In [31]:
stft_f.shape

torch.Size([1, 2, 5000, 2049])

In [21]:
stft_f = torch.mean(stft_f, 1, keepdim=True)

In [32]:
stft_f.shape

torch.Size([1, 2, 5000, 2049])

In [33]:
k = stft_f.permute(2,0,1,3)

In [34]:
k.shape


torch.Size([5000, 1, 2, 2049])

In [1]:
import torch

In [4]:
import scipy.signal

In [24]:
a = np.random.randn((100))

In [28]:
b = scipy.signal.find_peaks(a, height=0.7)

In [29]:
b[0]

array([ 5,  7, 26, 32, 50, 53, 56, 60, 69, 71, 74, 77, 82, 87, 90, 92, 94,
       96, 98])

In [30]:
a

array([ 0.53071896, -1.1036617 , -0.40160848, -1.14950537, -0.37462194,
        1.5922571 ,  0.58171191,  0.85255842, -0.08164452,  0.11810969,
       -1.36122751, -1.75818078, -0.93611475, -0.07888777, -0.45290947,
       -0.19270839,  0.31505188,  0.58493627,  0.28739222,  0.60990167,
       -1.24741701,  0.39521685, -2.20564813, -0.63203404,  0.58645671,
        0.41597205,  1.850311  ,  0.06861904,  0.44558096, -1.52554165,
       -0.01792022, -0.02869859,  1.72709509,  1.37722648, -0.92499077,
        0.39369934, -0.19574454, -0.29916275, -0.66679583, -1.28462171,
        0.63253362, -0.40714917, -0.66344988,  0.17892144,  0.68713829,
       -0.81843673,  0.04856925, -0.18903787, -1.05907236, -0.06054354,
        0.7490058 , -1.64417144,  0.81804329,  1.05734784,  0.38137929,
       -1.53598169,  1.08593923, -0.35388386, -0.50031001, -0.30774408,
        1.48581909,  0.44517201, -1.45478934, -2.36063491, -0.47860069,
        0.68089923, -0.602861  , -1.00018192, -0.45593662,  0.94

In [31]:
for x in range(a.shape[0]):
    if x in b[0]:
        a[x] = 1
    else:
        a[x] = 0
        

In [32]:
a

array([0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.])

In [75]:
def make_bin(x, thresh):
    y = x.numpy()
    #print(type(y))
    z = scipy.signal.find_peaks(y, height=thresh)
    for i in range(x.shape[0]):
        if i in z[0]:
            y[i] = 1
        else:
            y[i] = 0
    return torch.from_numpy(y)
    return

In [38]:
a = np.random.randn((111))

In [39]:
b = torch.from_numpy(a)

In [76]:
make_bin(b, thresh=0.6)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
        0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
        1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0.,
        0., 1., 0.], dtype=torch.float64)

In [67]:
for i in range(111):
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
