# インポート

In [1]:
# import packages

# python
import os
from glob import glob
import math

# numpt
import numpy as np

# scipy
import scipy.signal

# bokeh
from bokeh.plotting import output_notebook, figure, show
output_notebook()

# local
from WavData import WavData
import FileUtil
import SignalProcessingUtil as spu
import NotebookUtil as nu

# データディレクトリ内のファイルを列挙

In [2]:
# enumerate
top_dir = os.environ.get('PWD')
print(f'top_dir={top_dir}')
wav_files = glob(os.path.join(top_dir, 'data/*.wav'))

# dump
wav_files

top_dir=/kick_extractor


['/kick_extractor/data/blue_eyes.wav',
 '/kick_extractor/data/Freaking Tight - Alex Prospect_2.wav',
 '/kick_extractor/data/last_goodbye_mob.wav',
 '/kick_extractor/data/SHOT ME DOWN - Alex Prospect 2_2.wav',
 '/kick_extractor/data/Tremor - Alex Prospect & WILSXN_2.wav',
 '/kick_extractor/data/02 Dreamer (7iva & saqwz Remix).wav',
 '/kick_extractor/data/satellite.wav',
 '/kick_extractor/data/happy_days_refrain_20180421_2.wav',
 '/kick_extractor/data/save_a_life.wav',
 '/kick_extractor/data/Heaven 2017 - Alex Prospect_2.wav',
 '/kick_extractor/data/stay_young.wav',
 '/kick_extractor/data/Never Forget You - Alex Prospect_2.wav',
 '/kick_extractor/data/look_back.wav',
 '/kick_extractor/data/WDYWFM - Alex Prospect_2.wav',
 '/kick_extractor/data/Need U 100% - Alex Prospect & Spyro_2.wav',
 '/kick_extractor/data/all_about_elysium.wav',
 '/kick_extractor/data/INTOXICATED - Alex Prospect & Spyro_2.wav',
 '/kick_extractor/data/show_me_the_way.wav',
 '/kick_extractor/data/Up 2 No Good - Alex Pro

# 入力ファイルをロード

In [3]:
# TODO 決め打ち
src_path = '/kick_extractor/data/Need U 100% - Alex Prospect & Spyro_2.wav'

# load
wav_data = FileUtil.load_wav_file(src_path)

# plot
nu.plot_wav_data({'original waveform': wav_data})

  sample_rate, samples = wavfile.read(src_path)


# pre-process wav data

In [4]:
# TODO 決め打ちのパラメータ
beat_per_minute = 170
beat_length_in_sec = 60 / 170
peak_filter_initial_frequency_in_hz = 80
peak_filter_bandwidth_in_hz = 100 #200

# 先頭 1/8 音符分を切り出し
beat_length_in_sample = beat_length_in_sec * wav_data.sample_rate
wav_data_truncated = WavData(wav_data.sample_rate, wav_data.samples[:,:int(beat_length_in_sample/2)])

# モノラルにミックスダウン
wav_data_mono = WavData(wav_data_truncated.sample_rate, np.mean(wav_data_truncated.samples, axis=0))

# 最も大きい正弦波以外の周波数帯をカットオフして「キレイ」にする
wav_data_mono_smoothed_max = np.zeros_like(wav_data_mono.samples)
wav_data_mono_smoothed_min = np.zeros_like(wav_data_mono.samples)
for f in [f for f in np.geomspace(40, 240, 256)]:
    for q in [q for q in np.linspace(100, 200, 25)]:
        wav_data_mono_filtered = spu.apply_peak_filter(wav_data_mono, peak_filter_bandwidth_in_hz, f).samples
        wav_data_mono_smoothed_max = np.fmax(wav_data_mono_smoothed_max, wav_data_mono_filtered)
        wav_data_mono_smoothed_min = np.fmin(wav_data_mono_smoothed_min, wav_data_mono_filtered)
wav_data_mono_smoothed = WavData(wav_data_mono.sample_rate, wav_data_mono_smoothed_max + wav_data_mono_smoothed_min)

# 結果をプロット
nu.plot_wav_data({'original waveform': wav_data_mono, 'smoothed waveform': wav_data_mono_smoothed})

# 極値を探索

In [5]:
# 極値検出関数
def detect_next_extrema(wav_data: WavData, base_position_in_sample: int, filter_frequency_in_hz: float, direction: int):
    # wav_data_filtered = spu.apply_peak_filter(wav_data, peak_filter_bandwidth_in_hz, filter_frequency_in_hz)
    # extremas = scipy.signal.argrelextrema(wav_data_filtered.samples, np.greater)[0]
    extremas = scipy.signal.argrelextrema(wav_data.samples, np.greater)[0]
    base_position_extremas_index = np.abs(np.asarray(extremas) - base_position_in_sample).argmin()
    next_position_extremas_index = base_position_extremas_index + direction
    if not 0 <= next_position_extremas_index < len(extremas):
        return None
    next_position_in_sample = extremas[next_position_extremas_index]
    next_filter_period_in_sample = abs(base_position_in_sample - next_position_in_sample) * 2.0 # 隣の極値までの距離（サンプル数）は半周期分なので２倍する
    next_filter_period_in_sec = next_filter_period_in_sample / wav_data.sample_rate
    next_filter_frequency_in_hz = 1.0 / next_filter_period_in_sec
    return (next_position_in_sample, next_filter_frequency_in_hz)

# 種となる最初の極値を検出
wav_data_mono_smoothed_abs = WavData(wav_data_mono_smoothed.sample_rate, np.abs(wav_data_mono_smoothed.samples))
initial_extrema_position_in_sample = np.argmax(wav_data_mono_smoothed_abs.samples)

# 初期極値から左（過去）に向けて極値を探索
left_side_extrema_positions_in_sample = []
current_extrema_position_in_sample = initial_extrema_position_in_sample
current_preak_filter_frequency_in_hz = peak_filter_initial_frequency_in_hz
while True:
    detection_result = detect_next_extrema(wav_data_mono_smoothed_abs, current_extrema_position_in_sample, current_preak_filter_frequency_in_hz, -1)
    if detection_result is None:
        break
    current_extrema_position_in_sample, current_preak_filter_frequency_in_hz = detection_result
    if len(left_side_extrema_positions_in_sample) > 0 and current_extrema_position_in_sample > left_side_extrema_positions_in_sample[0]:
        break
    left_side_extrema_positions_in_sample.append(current_extrema_position_in_sample)

# 初期極値から右（未来）に向けて極値を探索
right_side_extrema_positions_in_sample = []
current_extrema_position_in_sample = initial_extrema_position_in_sample
current_preak_filter_frequency_in_hz = peak_filter_initial_frequency_in_hz
while True:
    detection_result = detect_next_extrema(wav_data_mono_smoothed_abs, current_extrema_position_in_sample, current_preak_filter_frequency_in_hz, +1)
    if detection_result is None:
        break
    current_extrema_position_in_sample, current_preak_filter_frequency_in_hz = detection_result
    if len(right_side_extrema_positions_in_sample) > 0 and current_extrema_position_in_sample < right_side_extrema_positions_in_sample[-1]:
        break
    right_side_extrema_positions_in_sample.append(current_extrema_position_in_sample)

# 結合
extrema_positions_in_sample = np.asarray( list(reversed(left_side_extrema_positions_in_sample)) + [initial_extrema_position_in_sample] + right_side_extrema_positions_in_sample )

# プロット
extrema_pulse = WavData(wav_data_mono_smoothed.sample_rate, np.zeros_like(wav_data_mono_smoothed.samples))
for i in extrema_positions_in_sample:
    extrema_pulse.samples[i] = wav_data_mono_smoothed.samples[i]
nu.plot_wav_data({'extrema': extrema_pulse, 'smoothed waveform': wav_data_mono_smoothed})

# 極端な外れ値を除外
- 外れ値リストの中央を始点として外れ値を探索する
- 極値の極性が隣接極値と反対になってない＝反対になってない隣接極値を外れ値とみなす
- 外れ値で挟まれた閉区間の外側を外れ値として除外する

In [6]:
# 「どちらか一方の隣接極値と符号が同一なら True 」な配列を生成(is_outlier)
extrema_signes = wav_data_mono_smoothed.samples[extrema_positions_in_sample] > 0.0
extrema_signes_shift_left = np.roll(extrema_signes, -1)
extrema_signes_shift_left[-1] = extrema_signes[-1]
extrema_signes_shift_right = np.roll(extrema_signes, 1)
extrema_signes_shift_right[0] = extrema_signes[0]
is_outlier = ( extrema_signes == extrema_signes_shift_left ) | ( extrema_signes == extrema_signes_shift_right )

# 中央から左（過去）方向に向けて外れ値を探索
initial_extrema_index = int(extrema_positions_in_sample.size / 2)
outlier_index_left = initial_extrema_index
outlier_index_right = initial_extrema_index
while ( 0 < outlier_index_left ) and ( not is_outlier[outlier_index_left] ):
    outlier_index_left = outlier_index_left - 1
while ( outlier_index_right < is_outlier.size - 1 ) and ( not is_outlier[outlier_index_right] ):
    outlier_index_right = outlier_index_right + 1

# 切り出し
inlier_extrema_positions_in_samples = extrema_positions_in_sample[outlier_index_left:outlier_index_right]

# プロット
extrema_pulse = WavData(wav_data_mono_smoothed.sample_rate, np.zeros_like(wav_data_mono_smoothed.samples))
for i in inlier_extrema_positions_in_samples:
    extrema_pulse.samples[i] = wav_data_mono_smoothed.samples[i]
nu.plot_wav_data({'extrema': extrema_pulse, 'smoothed waveform': wav_data_mono_smoothed})

# ゼロクロス点（サブサンプル精度）を抽出

In [7]:
# 全てのゼロクロス点を抽出
zero_cross_point_in_samples = np.asarray( np.nonzero( ( wav_data_mono_smoothed.samples[:-1] > 0.0 ) ^ ( np.roll(wav_data_mono_smoothed.samples, -1)[:-1] > 0.0 ) ) ).flatten()

# インライア近傍のゼロクロス点を抽出
inliner_zero_cross_point_in_samples = zero_cross_point_in_samples[(inlier_extrema_positions_in_samples[0] < zero_cross_point_in_samples) & (zero_cross_point_in_samples < inlier_extrema_positions_in_samples[-1])]

# サブサンプルオフセットを推定
#
# ゼロクロス点前後のサンプルを通る直線は
# y = (samples[i+1] - samples[i]) * x + samples[i]
# これを y = 0 として x について解くと
# x = -samples[i] / (samples[i+1] - samples[i])
# これをベクトル演算で頑張ると↓になる
slope = wav_data_mono_smoothed.samples[inliner_zero_cross_point_in_samples+1] - wav_data_mono_smoothed.samples[inliner_zero_cross_point_in_samples]
intercept = wav_data_mono_smoothed.samples[inliner_zero_cross_point_in_samples]
subsample_offset = -intercept / slope

# サブサンプル精度のゼロクロス点を計算
inliner_zero_cross_points_in_subsamples = inliner_zero_cross_point_in_samples + subsample_offset

# ダンプ
# TODO 波形とエンベロープのオーバーレイ表示機能が必要
print(inliner_zero_cross_points_in_subsamples)

[ 346.77556752  355.38964762  392.04334086  411.58433028  474.98827843
  555.94932336  643.54680436  759.13109855  877.27241211 1003.81325463
 1136.00567464 1272.66321745 1421.34665202 1584.85029741 1753.96435412
 1939.86510464 2144.32080307 2361.76872189 2588.71646926 2831.68664083
 3092.31307688 3377.26950664 3682.81830113 3991.63796243 4323.7762758
 4676.69611978 5048.72903784 5376.71852906 5688.17587325 5993.82939034
 6301.64243385 6681.5406425  7084.97243591]


# 極値位置をサブサンプル精度化
- 半周期の正弦波を当てはめる
- 当てはめ結果の「時の山のてっぺん」を新しいサンプル位置として採用する

In [8]:
# TODO
inlier_extrema_positions_in_subsamples = inlier_extrema_positions_in_samples

# ゼロクロスと極値をマージ

In [9]:
# DEBUG
#inlier_control_points_in_subsamples = np.sort( np.concatenate([inlier_extrema_positions_in_subsamples, inliner_zero_cross_points_in_subsamples]) )
inlier_control_points_in_subsamples = np.sort( np.concatenate([inlier_extrema_positions_in_samples, inliner_zero_cross_points_in_subsamples]) )

# TODO 波形とエンベロープのオーバーレイ表示機能が必要
print(inliner_zero_cross_points_in_subsamples)
nu.plot_envelope({'control points': np.vstack((inlier_control_points_in_subsamples, wav_data_mono_smoothed.samples[inlier_control_points_in_subsamples.astype(np.int32)]))}, is_log_scale=False)


[ 346.77556752  355.38964762  392.04334086  411.58433028  474.98827843
  555.94932336  643.54680436  759.13109855  877.27241211 1003.81325463
 1136.00567464 1272.66321745 1421.34665202 1584.85029741 1753.96435412
 1939.86510464 2144.32080307 2361.76872189 2588.71646926 2831.68664083
 3092.31307688 3377.26950664 3682.81830113 3991.63796243 4323.7762758
 4676.69611978 5048.72903784 5376.71852906 5688.17587325 5993.82939034
 6301.64243385 6681.5406425  7084.97243591]


# ゼロクロスと極値で比較
- 正弦波のフィッティングでサブサンプル精度化するのはダメダメだった

In [10]:
def plot_frequency(points_in_subsamples):
    # 極値をもとに周期を計算（半周期なので２倍する）
    inlier_extrema_periods_in_samples = (np.roll(points_in_subsamples, -1) - points_in_subsamples)[:-1] * 2

    # 周波数に変換
    inlier_extrema_preiods_in_sec = inlier_extrema_periods_in_samples / wav_data_mono_smoothed.sample_rate
    inlier_extrema_frequency_in_hz = 1.0 / inlier_extrema_preiods_in_sec

    # プロット
    nu.plot_envelope({'freq': np.vstack((points_in_subsamples[:-1], inlier_extrema_frequency_in_hz))}, is_log_scale=True)
    
plot_frequency(inlier_extrema_positions_in_samples)
plot_frequency(inlier_extrema_positions_in_subsamples)
plot_frequency(inliner_zero_cross_points_in_subsamples)
plot_frequency(inlier_control_points_in_subsamples)

# 粗い周波数推移を推定
- 極値周辺の周波数を隣接極値との距離を元に粗く推定

In [11]:
# 極値をもとに周期を計算（半周期なので２倍する）
inlier_extrema_periods_in_samples = (np.roll(inlier_control_points_in_subsamples, -1) - inlier_control_points_in_subsamples)[:-1] * 2

# 周波数に変換
inlier_extrema_preiods_in_sec = inlier_extrema_periods_in_samples / wav_data_mono_smoothed.sample_rate
inlier_extrema_frequency_in_hz = 1.0 / inlier_extrema_preiods_in_sec

# プロット
nu.plot_envelope({'freq': np.vstack((inlier_control_points_in_subsamples[:-1], inlier_extrema_frequency_in_hz))}, is_log_scale=True)

# 粗い周波数推移を使って波形を再スムージング

# 詳細な周波数推移を推定
- ゼロ交差点で囲われた「山」に正弦波を当てはめて厳密な極値位置と周波数と最大振幅を推定

# 周波数推移に式を当てはめ
- 周波数の推移データに対して何らかの数式的なモデルを当てはめる
- それによりモデルのパラメータを得る
- 多分 RANSAC か LMeds

# 当てはめ結果を元に波形を再構築
- モデルと推定したパラメータに基づいて正弦波を再生
- ファイルとして保存する
- 1/8 音符ジャストのタイミングで振幅が 0 になるように開始位相を補正する