In [110]:
import librosa
import numpy as np
import torch

from src.constants import SAMPLE_RATE, WINDOW_LENGTH

HOP_LENGTH = 20
HOP_LENGTH = int(HOP_LENGTH / 1000 * SAMPLE_RATE)
SAMPLE_RATE = 16000

audio, _ = librosa.load("input/j3ko.wav", sr=SAMPLE_RATE)
audio_l = len(audio)
audio = np.pad(audio, WINDOW_LENGTH // 2, mode="constant")


audio = torch.from_numpy(audio).float()
audio_steps = audio_l // HOP_LENGTH + 1

seq_len = 2.55
seq_len = int(seq_len * SAMPLE_RATE)

chunk_duration = 3 
chunk_samples = int(chunk_duration * SAMPLE_RATE)
chunk_overlap = int(chunk_duration // 2 * SAMPLE_RATE)
chunks = []
for i in range(0, len(audio) + chunk_overlap, chunk_samples):
    st = max(0, i - chunk_overlap) 
    chunk = audio[st:st + chunk_overlap]
    if len(chunk) < chunk_samples:
        pad_size = chunk_samples - len(chunk)
        chunk = torch.nn.functional.pad(chunk, (0, pad_size), 'constant', 0)
    chunks.append(chunk)


datas = []

for chunk in chunks :
    data = []
    for i in range(len(chunk) // seq_len):
        begin_t = i * seq_len
        end_t = begin_t + seq_len + WINDOW_LENGTH
        data.append(
            chunk[begin_t:end_t],
        )
    data.append(
        chunk[-seq_len - WINDOW_LENGTH :],
    )
    datas.append(data)

datas = torch.from_numpy(np.array(datas)).float()

In [111]:
datas.shape

torch.Size([7, 2, 42848])

In [112]:
from src.model import E2E


model = torch.load('runs/Pitch_FL6_0/model.pt')
model.eval()
model = model.cuda()

In [113]:
datas.shape

torch.Size([7, 2, 42848])

In [114]:
results = []
overlap_results = []
i = 0
for data in datas:
    data = data.cuda()
    with torch.no_grad():
        result = model(data)
        # if (i > 0):
        #     matrix1 = results[i-1] 
        #     matrix2 = result[1]
        #     overlap_results.append(sum_overlap(matrix1, matrix2, chunk_overlap)) 
        results.append(result[1].cpu().numpy())
    i += 1

results = torch.from_numpy(np.array(results)).float()

In [115]:
# for result in results :
#     data = []
#     for i in range(len(chunk) // seq_len):
#         begin_t = i * seq_len
#         end_t = begin_t + seq_len + WINDOW_LENGTH
#         data.append(
#             chunk[begin_t:end_t],
#         )
#     data.append(
#         chunk[-seq_len - WINDOW_LENGTH :],
#     )
#     datas.append(data)

In [116]:
from src.utils import to_local_average_cents

cents_pred_chunck = []

for result in results:
    print(result.shape)
    cents_pred = to_local_average_cents(result.view(-1,360).cpu().numpy(),None, 0 )
    cents_pred_chunck.append(cents_pred)

torch.Size([2, 128, 360])
torch.Size([2, 128, 360])
torch.Size([2, 128, 360])
torch.Size([2, 128, 360])
torch.Size([2, 128, 360])
torch.Size([2, 128, 360])
torch.Size([2, 128, 360])


In [117]:
def __sum_overlap(matrix1, matrix2, overlap):
    matrix1 = np.pad(matrix1, (0, overlap), mode='constant')
    matrix2 = np.pad(matrix2, (overlap, 0), mode='constant')
    result = matrix2 + matrix1
    result[overlap:-overlap] =  result[overlap:-overlap]/2
    return result

In [118]:
def sum_overlap(matrix1, matrix2, overlap):
    matrix1 = matrix1[-overlap:]
    matrix2 = matrix2[:overlap]
    result = matrix2 + matrix1
    result[overlap:-overlap] = result[overlap:-overlap] / 2
    return result

In [119]:
# freq_pred = np.array(
#             [
#                 10 * (2 ** (cent_pred / 1200)) if cent_pred else 0
#                 for cent_pred in cents_pred
#             ]
#         )
freqs = []
freq_result = np.zeros(len(cents_pred_chunck[0]) * (len(cents_pred_chunck) - 1)) 
overlap_freqs = []
for i, cents_pred in enumerate(cents_pred_chunck):
    freq = np.array(
        [10 * (2 ** (cent / 1200)) if cent else 0 for cent in cents_pred]
    )
    overlap = len(freq) // 2
    start = max(0, i*overlap)
    if (i == 0):
        mean_freq = freq
        freq_result[start : start + len(freq)] += mean_freq
    else:
        mean_freq = sum_overlap(freqs[i-1],freq, overlap)
        print(f'start: {start} stop: {start + overlap}')
        freq_result[start : start + overlap] += mean_freq
        if (i == len(cents_pred_chunck) - 1):
            freq_result[start + overlap :] += freq[overlap:]
    freqs.append(freq)

start: 128 stop: 256
start: 256 stop: 384
start: 384 stop: 512
start: 512 stop: 640
start: 640 stop: 768
start: 768 stop: 896


ValueError: operands could not be broadcast together with shapes (640,) (128,) (640,) 

In [107]:
import pandas as pd

In [108]:
df = pd.DataFrame({"frequency": freq_result})

In [109]:
df.to_csv('f0.csv', index=False)