In [2]:
import os
import torch
import torchaudio
from IPython.display import Audio
import numpy as np
from scipy.fft import fft, ifft
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import random

In [3]:
wav_path = '/Users/ginoprasad/autotune/m4a_files/solo_no_background.m4a'

In [4]:
wav, sample_rate = torchaudio.load(wav_path)
wav = wav[0]
wav = wav[400000:]
Audio(wav, rate=sample_rate)

In [5]:
wav_slice = wav[:50000]
Audio(wav_slice, rate=sample_rate)

# Step 1: distance

Ideally: $x_t - x_{t+T} = 0$ for all t

$d_t (\tau) = \sum_{j=t+1}^{t+W} (x_j - x_{j+\tau})^2$

$= r_t(0) + r_{t+\tau}(0) - 2r_t(\tau)$

Where $r_t(\tau) = \sum_{j=t+1}^{t+W} x_j x_{j+\tau}$

In [73]:
W = 10000

In [74]:
def randints(n, k):
    return [random.randint(0, n-1) for _ in range(k)]

In [75]:
def d(t):
    autocorrelation = scipy.signal.convolve(wav_slice.numpy()[t:], wav_slice[t:t+W].numpy()[::-1], mode='valid')
    energy = scipy.signal.convolve(wav_slice.numpy()[t:] * wav_slice.numpy()[t:], np.ones(W), mode='valid')
    distance = (energy + energy[0]) - (2 * autocorrelation)
    return distance

In [92]:
def normalized_d(t):
    sum_ = 0
    ret = []
    distance = d(t)
    for tau, dist in enumerate(distance):
        if tau == 0:
            ret.append(1)
        else:
            ret.append(dist / ((1/tau) * sum_))
        sum_ += dist
    return np.array(ret), distance

In [134]:
def parabolic_interpolation(y):
    x = np.array(range(len(y)))
    x_squared = x ** 2
    ones = np.ones(len(y))

    mat = np.transpose(np.array([x_squared, x, ones]))
    a, b, c = np.matmul(np.linalg.inv(np.matmul(np.transpose(mat), mat)), np.transpose(mat)).dot(y)
    return -(b / (2 * a))

In [143]:
def pitch(x_threshold=5, y_threshold=0.25, t=0, width=3):
    ls, dist = normalized_d(t)
    minimum = None
    for x, val in enumerate(ls):
        if x > x_threshold and val < y_threshold and ls[x+1] > val:
            minimum = x
            break
    minimum = minimum-width + parabolic_interpolation(dist[minimum-width:minimum+width+1]) # parabolic interpolation
    return sample_rate / minimum, minimum

In [153]:
def pitch_predict(iterations=30):
    pitch_candidates = []
    for t in randints(len(wav_slice) - W, iterations):
        pitch_candidates.append(pitch(t=t)[0])
    median = np.median(pitch_candidates)
    return median if median in pitch_candidates else pitch_candidates[0]

In [193]:
freq = pitch_predict()
freq

546.6511034433393

In [198]:
base = np.arange(0, 2 ** 18).astype(np.float64)
amplitude = 1
def get_frequency(frequency):
    c = (frequency * 2 * np.pi) / sample_rate
    wavelet_ = amplitude * np.sin(c * base)
    return wavelet_

In [199]:
wav_slice = wav[:50000]
Audio(wav_slice, rate=sample_rate)

In [200]:
Audio(get_frequency(freq), rate=sample_rate)

In [201]:
Audio(wav_slice, rate=sample_rate*(554.36/freq))

In [202]:
Audio(get_frequency(554.36), rate=sample_rate)

In [205]:
torchaudio.Resample(sample_rate, sample_rate*(554.36/freq), dtype=waveform.dtype)

AttributeError: module 'torchaudio' has no attribute 'Resample'

In [212]:
sample_rate

32000

In [210]:
int(sample_rate * 554.36/freq)

32451

In [245]:
precision = 10000

In [246]:
resampled = torchaudio.functional.resample(wav_slice, orig_freq=precision, new_freq=int(precision * freq/554.36))

In [247]:
len(resampled)

49300

In [248]:
len(wav_slice)

50000

In [249]:
Audio(resampled, rate=sample_rate)