In [1]:
%cd ..

/home/horstl/git/Music-recognition


In [2]:
from pathlib import Path
import random
import torch
from torch.utils.data import DataLoader

from musicrecognition.train import get_song_paths
from musicrecognition.audio_dataset import AudioDataset
from musicrecognition.augmentation import get_augmenter
from musicrecognition.spectrogram import get_spectrogram_func
from musicrecognition.data_collate import create_collate_fn
from musicrecognition.model import LSTMNetwork

random.seed(42)
torch.manual_seed(42)
BATCH_SIZE = 6
TEST_SIZE = 0.2  # 70% train 30% test
SONG_SAMPLE_RATE = 44100  # Most songs in the dataset seem to have a sample-rate of 44100
MIN_AUDIO_LENGTH = 10
MAX_AUDIO_LENGTH = 30
LATENT_SPACE_SIZE = 48
DATA_ROOT = Path('data')
device = torch.device('cuda:1')



In [3]:
songs_paths = get_song_paths(DATA_ROOT / 'songs')
random.shuffle(songs_paths)
test_paths, train_paths = songs_paths[:round(len(songs_paths) * TEST_SIZE)], songs_paths[round(len(songs_paths) * TEST_SIZE):]

test_set = AudioDataset(test_paths, SONG_SAMPLE_RATE)

# Create augmentation function
augmenter = get_augmenter(DATA_ROOT / 'background_noises')

# Create spectrogram function
spectrogram_func = get_spectrogram_func(SONG_SAMPLE_RATE)

collate_fn = create_collate_fn(augmenter, MIN_AUDIO_LENGTH, MAX_AUDIO_LENGTH, SONG_SAMPLE_RATE, spectrogram_func)

test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, num_workers=12, collate_fn=collate_fn)



In [4]:
model = LSTMNetwork(256, 128, LATENT_SPACE_SIZE, 2)
model.load_state_dict(torch.load('musicrecognition/model_430k_steps.pth'))
model.eval()
model = model.to(device)

In [16]:
iter_loader = iter(test_loader)

anchors, positives = next(iter_loader)
anchors, positives = anchors.transpose(1, 2).to(device), positives.transpose(1, 2).to(device)

latent_anchors = model(anchors).cpu()
latent_positives = model(positives).cpu()

In [36]:
anchors, positives = next(iter_loader)
anchors, positives = anchors.transpose(1, 2).to(device), positives.transpose(1, 2).to(device)

latent_anchors = model(anchors).cpu()
latent_positives = model(positives).cpu()

pos_id = 0
latent_positive = latent_positives[pos_id]
latent_anchor = latent_anchors[pos_id]
print(f"positive: {((latent_anchor-latent_positive)**2).sum(axis=0).item()}")
for i in range(len(latent_positives) - 1):
    latent_negative = latent_positives[(i + pos_id + 1) % len(latent_positives)]
    distance = ((latent_anchor-latent_negative)**2).sum(axis=0)
    print(f"negative: {distance.item()}")

positive: 30.24498748779297
negative: 31.706796646118164
negative: 59.96929931640625
negative: 79.36145782470703
negative: 108.03630065917969
negative: 177.42984008789062
