In [None]:
from datetime import datetime
from pathlib import Path

# 初期設定値

wav_dir = Path(r"\\nas-t\開発用\マイクロフォン異常検知\実験データ\20240228倉庫\サブマイク側") ## WAVEファイルのパス
center_freqs = [250,500,1000,2000,4000]
tap = 1024
percentile = 90
mean_time_sec = 0.1
target_freq = "250"


In [None]:
# WAVファイルを集める

import glob
import os

wav_file_list = glob.glob(os.path.join(wav_dir,'**','*.WAV'), recursive = True)

print(len(wav_file_list))
print(wav_file_list)

In [None]:
# 実音データから異常検知パラメータを求める

import pandas as pd
import learning_test_func as ln

columns = list(map(str,center_freqs))
dataset = pd.DataFrame(columns=columns)
for file in wav_file_list:
    fs, signal = ln.wav_load(file)
    oct_freq_masks = ln.make_oct_masks(center_freqs, tap, fs)
    data_list = ln.detect_diff_data(signal, tap, oct_freq_masks, fs, mean_time_sec, percentile)
    dataset = pd.concat([dataset,pd.DataFrame([data_list], columns=columns)], axis=0)

In [None]:
# 外れ値を除外する smirnovgrubbs検定

o_dataset = {f'{freq}': list() for freq in oct_freq_masks.keys()}
for freq in oct_freq_masks.keys():
    o_dataset[freq], out = ln.smirnov_grubbs(list(dataset[freq]), 0.05)
    print(out)


In [None]:
# データを正規分布に近づける変換後、平均値信頼区間を求めて逆変換する

from sklearn.preprocessing import PowerTransformer, MinMaxScaler
import itertools
import numpy as np
from scipy import stats

mm = MinMaxScaler()
pt = PowerTransformer(method="yeo-johnson",standardize=False,copy=True)

t_dataset = {f'{freq}': list() for freq in oct_freq_masks.keys()}
t_stats = {f'{freq}': list() for freq in oct_freq_masks.keys()}
inv_stats = {f'{freq}': list() for freq in oct_freq_masks.keys()}
for freq in oct_freq_masks.keys():
    df = pd.DataFrame(o_dataset[freq],copy=True)
    mm.fit(df[:])
    df[:] = mm.transform(df[:])

    pt.fit(df[:])
    df[:] = pt.transform(df[:])

    n = len(df[:])
    dof = n-1
    mean =  df[:].mean()
    scale =  df[:].std(ddof=1) / np.sqrt(n)
    data = stats.t(loc=mean, scale=scale, df=dof)
    bottom, upper = data.interval(alpha = 0.95)
    t_dataset[freq] = list(itertools.chain.from_iterable(df[:].values.tolist()))
    t_stats[freq]  = np.array([mean,bottom,upper]).reshape(-1)
    inv_stats[freq] = pt.inverse_transform([mean, bottom, upper]).reshape(-1)
    inv_stats[freq] = mm.inverse_transform([mean, bottom, upper]).reshape(-1)



In [None]:
# データセットの平均値信頼区間を求めてグラフに描画する

from matplotlib import pyplot as plt

m = t_stats[target_freq][0]
b = t_stats[target_freq][1]
u = t_stats[target_freq][2]

fig = plt.figure(figsize=(5, 5))
plt.hist(t_dataset[target_freq], ec = 'white',bins = 15)
plt.axvline(m, color='orange', linestyle='dashed', linewidth=1)
plt.axvspan(b, u, color="lightgreen",alpha = 0.3)
plt.ylabel("count")
plt.legend([f"Dataset mean = {str(f'{m:.3g}')}",
            f"mean interval = {b:.3g} to {u:.3g}"],
            loc='upper left')


In [None]:
# 求めたパラメータのデータ分布を確認する

from seaborn_analyzer import hist

n,std,se,mean,sk,ku = ln.get_statics(list(dataset[target_freq]))
print(f"サンプルサイズ={n} 平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}")

hist.plot_normality(dataset, x=target_freq, rounddigit=2)
all_params, all_scores = hist.fit_dist(dataset, x=target_freq, dist=['norm', 'gamma', 'lognorm','cauchy','t'])
df_scores = pd.DataFrame(all_scores).T
df_scores


In [None]:
# 外れ値除外後のデータ分布を確認する

import learning_test as ln

n,std,se,mean,sk,ku = ln.get_statics(o_dataset[target_freq])
print(f"サンプルサイズ={n} 平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}")

o_df = pd.DataFrame(o_dataset[target_freq],copy=True)
hist.plot_normality(o_df, x=0, rounddigit=2)
all_params, all_scores = hist.fit_dist(o_df, x=0, dist=['norm', 'gamma', 'lognorm','cauchy','t'])
df_scores = pd.DataFrame(all_scores).T
df_scores

In [None]:
# 変換後のデータ分布を確認する

from seaborn_analyzer import hist

n,std,se,mean,sk,ku = ln.get_statics(t_dataset[target_freq])

print(f"サンプルサイズ={n} 平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}")
t_df = pd.DataFrame(t_dataset[target_freq],copy=True)
hist.plot_normality(t_df, x=0, rounddigit=2)
all_params, all_scores = hist.fit_dist(t_df, x=0, dist=['norm', 'gamma', 'lognorm','cauchy','t'])
df_scores = pd.DataFrame(all_scores).T
df_scores