In [None]:
from datetime import datetime
from pathlib import Path

# 設定値

## WAVEファイルのパス
wav_dir = Path(r"D:\Diana\新千歳空港\WAVE")
## 測定局ID
stid = "CH53"
## 校正完了時間
calibrated_time = datetime(year=2022,month=12,day=1,hour=0,minute=0,second=0)
## サンプルサイズ
sample_size = 385

center_freqs = [250,500,1000,2000,4000]
tap = 1024
percentile = 90
mean_time_sec = 0.1


In [None]:
# WAVファイルを集める

from datetime import timedelta
import learning_test_func as ln

time = calibrated_time
wav_file_list = []
while len(wav_file_list) < sample_size:
    event_time, path = ln.get_event_time(stid, wav_dir, time)
    if not event_time:
        print("失敗")
        break
    wav_file_list.append(path)
    time = event_time + timedelta(seconds = 1)

print(len(wav_file_list))


In [None]:
# 異常検知パラメータを求める
import pandas as pd
import learning_test_func as ln
import matplotlib.pyplot as plt

columns = list(map(str,center_freqs))
dataset = pd.DataFrame(columns=columns)
for file in wav_file_list:
    fs, signal = ln.wav_load(file)
    oct_freq_masks = ln.make_oct_masks(center_freqs, tap, fs)
    data_list = ln.detect_diff_data(signal, tap, oct_freq_masks, fs, mean_time_sec, percentile)
    dataset = pd.concat([dataset,pd.DataFrame([data_list], columns=columns)], axis=0)

fig = plt.figure(figsize=(20, 3))
i = 1
for freq in oct_freq_masks.keys():
    exec("h"+ str(i) + "=fig.add_subplot(1,5," + str(i) + ")")
    exec("h"+ str(i) + ".hist(dataset[freq],bins=15)")
    exec("h"+ str(i) + ".set_title(freq)")
    label = [str]
    plt.xlim([-1.5,1.5])
    i += 1

fig.tight_layout()
plt.show()


In [None]:
# 外れ値の除外 smirnovgrubbs検定
import pandas as pd
o_dataset = {f'{freq}': list() for freq in oct_freq_masks.keys()}
for freq in oct_freq_masks.keys():
    o_dataset[freq], out = ln.smirnov_grubbs(list(dataset[freq]), 0.2)
    print(out)



In [None]:
# 分布の表示

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import kurtosis, skew

def get_statics(data_list):
    n = len(data_list)
    std = round(np.std(data_list,ddof=1),3)
    se = round(std/np.sqrt(n),3)
    mean = round(np.mean(data_list),3)
    sk = round(skew(data_list),3)
    ku = round(kurtosis(data_list),3)

    return n,std,se,mean,sk,ku

fig = plt.figure(figsize=(20, 6))
i = 1
for freq in oct_freq_masks.keys():
    exec("h"+ str(i) + "=fig.add_subplot(2,5," + str(i) + ")")
    exec("h"+ str(i) + ".hist(dataset[freq],bins=15)")
    exec("h"+ str(i) + ".set_title(freq)")
    label = [str]
    n,std,se,mean,sk,ku = get_statics(dataset[freq])
    print(f"{str(freq)}:外れ値除去前:size={n} 平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}")
    plt.xlim([-1.5,1.5])
    exec("h"+ str(i+5) + "=fig.add_subplot(2,5," + str(i+5) + ")")
    exec("h"+ str(i+5) + ".hist(o_dataset[freq],bins=15)")
    exec("h"+ str(i+5) + ".set_title(freq)")
    label = [str]
    n,std,se,mean,sk,ku = get_statics(o_dataset[freq])
    print(f"{str(freq)}:外れ値除去後:size={n} 平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}\n")
    plt.xlim([-1.5,1.5])
    i += 1

fig.tight_layout()
plt.show()

In [None]:
# スケーリング
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, RobustScaler
import itertools

rb = RobustScaler(with_centering=False,with_scaling=True)
pt = PowerTransformer(standardize=False)
fig = plt.figure(figsize=(20, 6))

i = 1
t_dataset = {f'{freq}': list() for freq in oct_freq_masks.keys()}
for freq in oct_freq_masks.keys():
    df = pd.DataFrame(o_dataset[freq],copy=True)

    # df[:] = rb.fit_transform(df[:])
    df[:] = pt.fit_transform(df[:])

    t_dataset[freq] = list(itertools.chain.from_iterable(df.values.tolist()))

    exec("h"+ str(i) + "=fig.add_subplot(2,5," + str(i) + ")")
    exec("h"+ str(i) + ".hist(o_dataset[freq],bins=15)")
    exec("h"+ str(i) + ".set_title(freq)")
    plt.xlim([-3,3])
    label = [str]
    n,std,se,mean,sk,ku = get_statics(o_dataset[freq])
    print(f"{str(freq)}:除去後:平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}")

    exec("h"+ str(i+5) + "=fig.add_subplot(2,5," + str(i+5) + ")")
    exec("h"+ str(i+5) + ".hist(t_dataset[freq],bins=15)")
    exec("h"+ str(i+5) + ".set_title(freq)")
    plt.xlim([-3,3])
    label = [str]
    n,std,se,mean,sk,ku = get_statics(t_dataset[freq])
    print(f"{str(freq)}:変換後:平均={mean} 標準偏差={std} 誤差={se} 歪度={sk} 尖度={ku}\n")


    i += 1

fig.tight_layout()
plt.show()

In [None]:
from seaborn_analyzer import hist
target_freq  = "4000"

t_df = pd.DataFrame(t_dataset[target_freq],copy=True)
o_df = pd.DataFrame(o_dataset[target_freq],copy=True)

hist.plot_normality(t_df, x=0, rounddigit=2)
all_params, all_scores = hist.fit_dist(t_df, x=0, dist=['norm', 'gamma', 'lognorm','cauchy','t'])
df_scores = pd.DataFrame(all_scores).T
print(df_scores)