In [1]:
import math
import numpy
import torch
import torchaudio
torchaudio.set_audio_backend("soundfile")
import matplotlib.pyplot as plt



In [56]:
loadedData = torchaudio.load("Samples/a1.wav")
tensor = loadedData[0][0]
sampleRate = loadedData[1]
del loadedData

In [57]:
expectedPitch = 95.
searchRange = 0.2
batchSize = math.floor((1. + searchRange) * sampleRate / expectedPitch)
lowerSearchLimit = math.floor((1. - searchRange) * sampleRate / expectedPitch)

In [58]:
batchStart = 0
deltas = torch.tensor([], dtype = int)
while batchStart + batchSize <= tensor.size()[0] - batchSize:
    
    sample = torch.index_select(tensor, 0, torch.linspace(batchStart, batchStart + batchSize, batchSize, dtype = int))
    zeroTransitions = torch.tensor([], dtype = int)
    for i in range(lowerSearchLimit, batchSize):
        if (sample[i-1] < 0) and (sample[i] > 0):
            zeroTransitions = torch.cat([zeroTransitions, torch.tensor([i])], 0)
            
    error = math.inf
    delta = math.floor(sampleRate / expectedPitch)
    for i in zeroTransitions:
        shiftedSample = torch.index_select(tensor, 0, torch.linspace(batchStart + i.item(), batchStart + batchSize + i.item(), batchSize, dtype = int))
        newError = torch.sum(torch.pow(sample - shiftedSample, 2))
        if error > newError:
            delta = i.item()
            error = newError
            
    deltas = torch.cat([deltas, torch.tensor([delta])])
    batchStart += delta

In [59]:
nBatches = deltas.size()[0]
borders = torch.zeros(nBatches + 1, dtype = int)
for i in range(nBatches):
    borders[i+1] = borders[i] + deltas[i]

In [76]:
Spectrums = torch.zeros(nBatches-2, torch.min(deltas))
Excitations = torch.zeros(nBatches-2, torch.min(deltas), dtype = torch.complex64)
for i in range(2, nBatches-1):
    lowerBorder = borders[i]
    upperBorder = borders[i+1]
    Window = torch.hann_window((upperBorder-lowerBorder) * 3)
    sample = torch.index_select(tensor, 0, torch.linspace(2 * lowerBorder - upperBorder, 2 * upperBorder - lowerBorder, 3 * (upperBorder-lowerBorder)).int())
    sample = Window * sample
    sample = torch.pow(torch.fft.fft(sample, n = torch.min(deltas)).abs(), 2)
    Spectrums[i-1] = sample
    #implement pitch-adaptive spectral smoothing
    batch = torch.index_select(tensor, 0, torch.linspace(lowerBorder, upperBorder, upperBorder-lowerBorder).int())
AverageSpectrum = torch.mean(Spectrums, 0)
for i in range(2, nBatches-1):
    Spectrums[i-1] = Spectrums[i-1] - AverageSpectrum
    Excitations[i-1] = torch.fft.fft(batch, n = torch.min(deltas)) / AverageSpectrum
    AverageExcitation = torch.mean(Excitations, 0)
    Excitations[i-1] = Excitations[i-1] - AverageExcitation

In [82]:
finalCurve = AverageSpectrum * AverageExcitation
torchaudio.save("output_Averaged.wav", torch.unsqueeze(finalCurve.real, 0), sampleRate, format="wav", encoding="PCM_S", bits_per_sample=32)