# インポート

In [1]:
# import packages

# python
import os
from glob import glob
import math
from typing import Dict

# numpt
import numpy as np

# scipy
import scipy.signal

# bokeh
from bokeh.plotting import output_notebook, figure, show
output_notebook()

# local
from WavData import WavData
import FileUtil
import SignalProcessingUtil as spu
import NotebookUtil as nu

# データディレクトリ内のファイルを列挙

In [2]:
# enumerate
top_dir = os.environ.get('PWD')
print(f'top_dir={top_dir}')
wav_files = glob(os.path.join(top_dir, 'data/*.wav'))

# dump
wav_files

top_dir=/kick_extractor


['/kick_extractor/data/blue_eyes.wav',
 '/kick_extractor/data/Freaking Tight - Alex Prospect_2.wav',
 '/kick_extractor/data/last_goodbye_mob.wav',
 '/kick_extractor/data/SHOT ME DOWN - Alex Prospect 2_2.wav',
 '/kick_extractor/data/Tremor - Alex Prospect & WILSXN_2.wav',
 '/kick_extractor/data/02 Dreamer (7iva & saqwz Remix).wav',
 '/kick_extractor/data/satellite.wav',
 '/kick_extractor/data/happy_days_refrain_20180421_2.wav',
 '/kick_extractor/data/save_a_life.wav',
 '/kick_extractor/data/Heaven 2017 - Alex Prospect_2.wav',
 '/kick_extractor/data/stay_young.wav',
 '/kick_extractor/data/Never Forget You - Alex Prospect_2.wav',
 '/kick_extractor/data/look_back.wav',
 '/kick_extractor/data/WDYWFM - Alex Prospect_2.wav',
 '/kick_extractor/data/Need U 100% - Alex Prospect & Spyro_2.wav',
 '/kick_extractor/data/all_about_elysium.wav',
 '/kick_extractor/data/INTOXICATED - Alex Prospect & Spyro_2.wav',
 '/kick_extractor/data/show_me_the_way.wav',
 '/kick_extractor/data/Up 2 No Good - Alex Pro

# 入力ファイルをロード

In [3]:
# TODO 決め打ち
src_path = '/kick_extractor/data/WDYWFM - Alex Prospect_2.wav'

# load
wavdata = FileUtil.load_wav_file(src_path)

# plot
nu.plot( nu.describe_wavdata('original waveform', wavdata) )

  sample_rate, samples = wavfile.read(src_path)


# pre-process wav data

In [4]:
# TODO 決め打ちのパラメータ
beat_per_minute = 170
beat_length_in_sec = 60 / 170
peak_filter_initial_frequency_in_hz = 80
peak_filter_bandwidth_in_hz = 100 #200
upsampling_rate = 1

# 先頭 1/8 音符分を切り出し
beat_length_in_sample = beat_length_in_sec * wavdata.sample_rate
wavdata_truncated = WavData(wavdata.sample_rate, wavdata.samples[:,:int(beat_length_in_sample/2)])

# モノラルにミックスダウン
wavdata_mono = WavData(wavdata_truncated.sample_rate, np.mean(wavdata_truncated.samples, axis=0))

# 最も大きい正弦波以外の周波数帯をカットオフして「キレイ」にする
wavdata_mono_smoothed_max = np.zeros_like(wavdata_mono.samples)
wavdata_mono_smoothed_min = np.zeros_like(wavdata_mono.samples)
for f in [f for f in np.geomspace(40, 240, 256)]:
    for q in [q for q in np.linspace(100, 200, 16)]:
        wavdata_mono_filtered = spu.apply_peak_filter(wavdata_mono, peak_filter_bandwidth_in_hz, f).samples
        wavdata_mono_smoothed_max = np.fmax(wavdata_mono_smoothed_max, wavdata_mono_filtered)
        wavdata_mono_smoothed_min = np.fmin(wavdata_mono_smoothed_min, wavdata_mono_filtered)
wavdata_mono_smoothed = WavData(wavdata_mono.sample_rate, wavdata_mono_smoothed_max + wavdata_mono_smoothed_min)

# アップサンプリング
samples_prepro = scipy.signal.resample(wavdata_mono_smoothed.samples, wavdata_mono_smoothed.length_in_samples()*upsampling_rate)
wavdata_prepro = WavData(wavdata_mono_smoothed.sample_rate*upsampling_rate, samples_prepro)

# 結果をプロット
nu.plot(
    nu.describe_wavdata('original waveform', wavdata_mono) +
    nu.describe_wavdata('smoothed waveform', wavdata_mono_smoothed) +
    nu.describe_wavdata('pre-processed', wavdata_prepro)
)

# 極値を探索

In [5]:
# 結合
min_positions_in_samples = scipy.signal.argrelmin(wavdata_prepro.samples, order=4)
max_positions_in_samples = scipy.signal.argrelmax(wavdata_prepro.samples, order=4)
raw_extrema_positions_in_sample = np.sort(np.asarray((min_positions_in_samples,max_positions_in_samples)).flatten())
print(type(raw_extrema_positions_in_sample))

# プロット
nu.plot(
    nu.describe_dot_on_wavdata('extrema', wavdata_prepro, raw_extrema_positions_in_sample) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro)
)

<class 'numpy.ndarray'>


# 同符号の極値をグループとみなして代表点で置き換える

In [6]:
# for ループで頑張る
extrema_positions_in_sample = []
position_buffer = []
value_buffer = []
for position in raw_extrema_positions_in_sample:
    value = wavdata_prepro.samples[position]
    sign = value > 0.0
    if len(position_buffer) != 0:
        last_position = position_buffer[-1]
        last_value = value_buffer[-1]
        last_sign = last_value > 0.0
        if sign != last_sign:
            averaged_position = np.average(position_buffer, weights=value_buffer)
            extrema_positions_in_sample.append(averaged_position)
            position_buffer.clear()
            value_buffer.clear()
    position_buffer.append(position)
    value_buffer.append(value)
extrema_positions_in_sample = np.asarray(extrema_positions_in_sample).astype(np.int32)
    
# プロット
nu.plot(
    nu.describe_dot_on_wavdata('extrema', wavdata_prepro, extrema_positions_in_sample) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro)
)

# 極端な外れ値を除外
- 外れ値リストの中央を始点として外れ値を探索する
- 極値の極性が隣接極値と反対になってない＝反対になってない隣接極値を外れ値とみなす
- 外れ値で挟まれた閉区間の外側を外れ値として除外する

In [7]:
# 「どちらか一方の隣接極値と符号が同一なら True 」な配列を生成(is_outlier)
extrema_signes = wavdata_prepro.samples[extrema_positions_in_sample] > 0.0
extrema_signes_shift_left = np.roll(extrema_signes, -1)
extrema_signes_shift_left[-1] = extrema_signes[-1]
extrema_signes_shift_right = np.roll(extrema_signes, 1)
extrema_signes_shift_right[0] = extrema_signes[0]
is_outlier = ( extrema_signes == extrema_signes_shift_left ) | ( extrema_signes == extrema_signes_shift_right )

# 中央から左（過去）方向に向けて外れ値を探索
initial_extrema_index = int(len(extrema_positions_in_sample) / 2)
outlier_index_left = initial_extrema_index
outlier_index_right = initial_extrema_index
while ( 0 < outlier_index_left ) and ( not is_outlier[outlier_index_left] ):
    outlier_index_left = outlier_index_left - 1
while ( outlier_index_right < is_outlier.size - 1 ) and ( not is_outlier[outlier_index_right] ):
    outlier_index_right = outlier_index_right + 1

# 切り出し
inlier_extrema_positions_in_samples = extrema_positions_in_sample[outlier_index_left:outlier_index_right]

# プロット
nu.plot(
    nu.describe_dot_on_wavdata('extrema', wavdata_prepro, inlier_extrema_positions_in_samples) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro)
)

# ゼロクロス点（サブサンプル精度）を抽出

In [8]:
# 全てのゼロクロス点を抽出
zero_cross_point_in_samples = np.asarray( np.nonzero( ( wavdata_prepro.samples[:-1] > 0.0 ) ^ ( np.roll(wavdata_prepro.samples, -1)[:-1] > 0.0 ) ) ).flatten()

# インライア近傍のゼロクロス点を抽出
inliner_zero_cross_point_in_samples = zero_cross_point_in_samples[(inlier_extrema_positions_in_samples[0] < zero_cross_point_in_samples) & (zero_cross_point_in_samples < inlier_extrema_positions_in_samples[-1])]

# サブサンプルオフセットを推定
#
# ゼロクロス点前後のサンプルを通る直線は
# y = (samples[i+1] - samples[i]) * x + samples[i]
# これを y = 0 として x について解くと
# x = -samples[i] / (samples[i+1] - samples[i])
# これをベクトル演算で頑張ると↓になる
slope = wavdata_prepro.samples[inliner_zero_cross_point_in_samples+1] - wavdata_prepro.samples[inliner_zero_cross_point_in_samples]
intercept = wavdata_prepro.samples[inliner_zero_cross_point_in_samples]
subsample_offset = -intercept / slope

# サブサンプル精度のゼロクロス点を計算
inliner_zero_cross_points_in_subsamples = inliner_zero_cross_point_in_samples + subsample_offset

# プロット
nu.plot(
    nu.describe_dot_on_wavdata('zero-cross', wavdata_prepro, inliner_zero_cross_points_in_subsamples) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro)
)

# 極値位置をサブサンプル精度化
- トゥルーピーク（インターサンプルピーク）を得る

In [9]:
def extrema_position_to_subsample(samples: np.ndarray, position_in_samples: int):
    '''
    position_in_samples をサブサンプル精度化して返却する。
    '''
    # エイリアス
    magnitude_left = abs(samples[position_in_samples-1])
    magnitude_center = abs(samples[position_in_samples])
    magnitude_right = abs(samples[position_in_samples+1])
    magnitude_1st = magnitude_center
    magnitude_2nd = max(magnitude_left, magnitude_right)
    magnitude_3rd = min(magnitude_left, magnitude_right)
    # サブサンプル単位のオフセットを計算
    subsample_offset = 0.5 * (magnitude_2nd - magnitude_3rd) / (magnitude_1st - magnitude_3rd)
    if magnitude_left > magnitude_right:
        return position_in_samples - subsample_offset
    else:
        return position_in_samples + subsample_offset

# サブサンプル精度化
inlier_extrema_positions_in_subsamples = np.array([extrema_position_to_subsample(wavdata_prepro.samples, position_in_samples) for position_in_samples in inlier_extrema_positions_in_samples])

# プロット
nu.plot(
    nu.describe_dot_on_wavdata('extrema', wavdata_prepro, inlier_extrema_positions_in_samples) +
    nu.describe_dot_on_wavdata('extrema(sub)', wavdata_prepro, inlier_extrema_positions_in_subsamples) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro)
)

# EDA 用にゼロクロスと極値をマージ

In [10]:
# DEBUG
inlier_control_points_in_subsamples = np.sort( np.concatenate([inlier_extrema_positions_in_subsamples, inliner_zero_cross_points_in_subsamples]) )

# TODO 波形とエンベロープのオーバーレイ表示機能が必要
# プロット
nu.plot(
    nu.describe_dot_on_wavdata('control point', wavdata_prepro, inlier_control_points_in_subsamples) +
    nu.describe_wavdata('wavdata_prepro', wavdata_prepro),
    beat_per_minute=beat_per_minute
)

# ゼロクロスと極値で比較
- 正弦波のフィッティングでサブサンプル精度化するのはダメダメだった

In [11]:
nu.plot(
    nu.describe_frequency('extrema(subsamples)', inlier_extrema_positions_in_subsamples, wavdata_prepro.sample_rate, 2) +
    nu.describe_frequency('zero-cross(subsamples)', inliner_zero_cross_points_in_subsamples, wavdata_prepro.sample_rate, 2) +
    nu.describe_frequency('ctrl(subsamples)', inlier_control_points_in_subsamples, wavdata_prepro.sample_rate, 4),
    is_log_scale=True
)
nu.plot(
    nu.describe_frequency('extrema(subsamples)', inlier_extrema_positions_in_subsamples, wavdata_prepro.sample_rate, 2) +
    nu.describe_frequency('zero-cross(subsamples)', inliner_zero_cross_points_in_subsamples, wavdata_prepro.sample_rate, 2),
    is_log_scale=True
)
nu.plot(
    nu.describe_frequency('ctrl(subsamples)', inlier_control_points_in_subsamples, wavdata_prepro.sample_rate, 4),
    is_log_scale=True
)

# ゼロクロスベース・極値ベースの周波数遷移を合成

In [12]:
# 単純に
inlier_control_points_in_hz = np.concatenate([
    spu.to_frequency(inlier_extrema_positions_in_subsamples, wavdata_prepro.sample_rate, 2),
    spu.to_frequency(inliner_zero_cross_points_in_subsamples, wavdata_prepro.sample_rate, 2)
])
inlier_control_points_in_subsamples = np.concatenate([inlier_extrema_positions_in_subsamples[:-1], inliner_zero_cross_points_in_subsamples[:-1]])

# 並び順を修正
sort_indices = np.argsort(inlier_control_points_in_subsamples)
inlier_control_points_in_hz = inlier_control_points_in_hz[sort_indices]
inlier_control_points_in_subsamples = inlier_control_points_in_subsamples[sort_indices]

# 位置情報を秒単位に修正
inlier_control_points_in_sec = inlier_control_points_in_subsamples / wavdata_prepro.sample_rate

# プロット
nu.plot(
    nu.describe_scatter('frquency(Hz)', inlier_control_points_in_hz, inlier_control_points_in_sec),
    is_log_scale=True,
    beat_per_minute=beat_per_minute
)

# 周波数推移に式を当てはめ
- 周波数の推移データに対して何らかの数式的なモデルを当てはめる
- それによりモデルのパラメータを得る
- 多分 RANSAC か LMeds

# 当てはめ結果を元に波形を再構築
- モデルと推定したパラメータに基づいて正弦波を再生
- ファイルとして保存する
- 1/8 音符ジャストのタイミングで振幅が 0 になるように開始位相を補正する