# 機械学習による音響分類と特徴抽出

このノートブックでは、機械学習を使用した音響信号の分類と特徴抽出について学習します。
音響の特徴量抽出から簡単な分類器の構築まで実践的に学びます。

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import seaborn as sns
from scipy import signal
from pydub import AudioSegment
from pydub.generators import Sine, Square, Sawtooth
import warnings
warnings.filterwarnings('ignore')

# 日本語フォントの設定
plt.rcParams['font.family'] = 'DejaVu Sans'

## 1. 音響データセットの生成

In [None]:
def generate_audio_dataset(n_samples_per_class=50):
    """
    異なる種類の音響信号を生成
    """
    samples = []
    labels = []
    
    sample_rate = 1000
    duration = 1.0  # 1秒
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    
    for i in range(n_samples_per_class):
        # 1. 純粋な正弦波（楽器音）
        freq = np.random.uniform(200, 800)
        signal_sine = np.sin(2 * np.pi * freq * t)
        # エンベロープ追加
        envelope = np.exp(-t * 2)  # 減衰
        signal_sine *= envelope
        # ランダムノイズ追加
        signal_sine += 0.05 * np.random.randn(len(signal_sine))
        samples.append(signal_sine)
        labels.append(0)  # 楽器音
        
        # 2. ノイズ（雑音）
        signal_noise = np.random.randn(len(t))
        # ローパスフィルタでホワイトノイズを調整
        b, a = signal.butter(4, 0.3)
        signal_noise = signal.filtfilt(b, a, signal_noise)
        samples.append(signal_noise)
        labels.append(1)  # ノイズ
        
        # 3. チャープ信号（周波数変化）
        f0, f1 = 100, 500
        signal_chirp = signal.chirp(t, f0, duration, f1)
        signal_chirp += 0.1 * np.random.randn(len(signal_chirp))
        samples.append(signal_chirp)
        labels.append(2)  # チャープ
        
        # 4. 複合音（和音）
        freqs = [200, 300, 400, 500]
        signal_chord = np.zeros_like(t)
        for f in freqs:
            signal_chord += np.sin(2 * np.pi * f * t) * np.exp(-t * 1.5)
        signal_chord += 0.05 * np.random.randn(len(signal_chord))
        samples.append(signal_chord)
        labels.append(3)  # 和音
    
    return np.array(samples), np.array(labels)

# データセット生成
X, y = generate_audio_dataset(100)
class_names = ['Instrument', 'Noise', 'Chirp', 'Chord']

print(f"データセットサイズ: {X.shape}")
print(f"クラス分布: {np.bincount(y)}")

## 2. サンプル音響データの可視化

In [None]:
# 各クラスのサンプルを表示
fig, axes = plt.subplots(2, 4, figsize=(16, 8))

for class_idx in range(4):
    # 最初のサンプルを取得
    sample_idx = np.where(y == class_idx)[0][0]
    sample_data = X[sample_idx]
    
    # 時間領域
    axes[0, class_idx].plot(sample_data)
    axes[0, class_idx].set_title(f'{class_names[class_idx]} - Time Domain')
    axes[0, class_idx].set_ylabel('Amplitude')
    axes[0, class_idx].grid(True)
    
    # 周波数領域
    freqs, psd = signal.periodogram(sample_data, fs=1000)
    axes[1, class_idx].semilogy(freqs, psd)
    axes[1, class_idx].set_title(f'{class_names[class_idx]} - Frequency Domain')
    axes[1, class_idx].set_xlabel('Frequency (Hz)')
    axes[1, class_idx].set_ylabel('PSD')
    axes[1, class_idx].grid(True)
    axes[1, class_idx].set_xlim(0, 500)

plt.tight_layout()
plt.show()

## 3. 音響特徴量の抽出

In [None]:
def extract_features(audio_samples, fs=1000):
    """
    音響信号から特徴量を抽出
    """
    features = []
    
    for sample in audio_samples:
        feature_vector = []
        
        # 1. 時間領域の特徴
        feature_vector.append(np.mean(sample))  # 平均
        feature_vector.append(np.std(sample))   # 標準偏差
        feature_vector.append(np.max(sample))   # 最大値
        feature_vector.append(np.min(sample))   # 最小値
        feature_vector.append(np.sqrt(np.mean(sample**2)))  # RMS
        
        # ゼロクロス率
        zero_crossings = np.where(np.diff(np.signbit(sample)))[0]
        feature_vector.append(len(zero_crossings) / len(sample))
        
        # 2. 周波数領域の特徴
        freqs, psd = signal.periodogram(sample, fs=fs)
        
        # スペクトル重心
        spectral_centroid = np.sum(freqs * psd) / np.sum(psd)
        feature_vector.append(spectral_centroid)
        
        # スペクトル帯域幅
        spectral_bandwidth = np.sqrt(np.sum(((freqs - spectral_centroid) ** 2) * psd) / np.sum(psd))
        feature_vector.append(spectral_bandwidth)
        
        # スペクトルロールオフ
        cumsum_psd = np.cumsum(psd)
        rolloff_idx = np.where(cumsum_psd >= 0.85 * cumsum_psd[-1])[0][0]
        spectral_rolloff = freqs[rolloff_idx]
        feature_vector.append(spectral_rolloff)
        
        # 主要周波数成分（ピーク検出）
        peaks, _ = signal.find_peaks(psd, height=np.max(psd) * 0.1)
        if len(peaks) > 0:
            dominant_freq = freqs[peaks[np.argmax(psd[peaks])]]
            feature_vector.append(dominant_freq)
            feature_vector.append(len(peaks))  # ピーク数
        else:
            feature_vector.extend([0, 0])
        
        # 3. 統計的特徴
        # 歪度と尖度
        from scipy.stats import skew, kurtosis
        feature_vector.append(skew(sample))
        feature_vector.append(kurtosis(sample))
        
        # エネルギー
        feature_vector.append(np.sum(sample**2))
        
        features.append(feature_vector)
    
    return np.array(features)

# 特徴量抽出
features = extract_features(X)
feature_names = ['Mean', 'Std', 'Max', 'Min', 'RMS', 'ZCR', 
                'SpectralCentroid', 'SpectralBandwidth', 'SpectralRolloff',
                'DominantFreq', 'NumPeaks', 'Skewness', 'Kurtosis', 'Energy']

print(f"特徴量の形状: {features.shape}")
print(f"特徴量名: {feature_names}")

## 4. 特徴量の可視化と分析

In [None]:
# 特徴量の分布を可視化
fig, axes = plt.subplots(3, 5, figsize=(20, 12))
axes = axes.ravel()

for i, feature_name in enumerate(feature_names):
    if i < len(axes):
        for class_idx, class_name in enumerate(class_names):
            class_features = features[y == class_idx, i]
            axes[i].hist(class_features, alpha=0.7, label=class_name, bins=20)
        
        axes[i].set_title(feature_name)
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)

# 最後のサブプロットを削除
if len(feature_names) < len(axes):
    fig.delaxes(axes[-1])

plt.tight_layout()
plt.show()

In [None]:
# 特徴量間の相関分析
import pandas as pd

# データフレーム作成
df_features = pd.DataFrame(features, columns=feature_names)
df_features['class'] = y

# 相関行列
plt.figure(figsize=(12, 10))
correlation_matrix = df_features.drop('class', axis=1).corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

## 5. PCAによる次元削減と可視化

In [None]:
# 特徴量の標準化
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# PCA適用
pca = PCA()
features_pca = pca.fit_transform(features_scaled)

# 累積寄与率の可視化
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), 
         pca.explained_variance_ratio_, 'bo-')
plt.title('PCA Explained Variance Ratio')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.grid(True)

plt.subplot(1, 3, 2)
plt.plot(range(1, len(pca.explained_variance_ratio_) + 1), 
         np.cumsum(pca.explained_variance_ratio_), 'ro-')
plt.axhline(y=0.95, color='k', linestyle='--', alpha=0.7)
plt.title('Cumulative Explained Variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid(True)

# 2次元PCAプロット
plt.subplot(1, 3, 3)
colors = ['red', 'blue', 'green', 'orange']
for class_idx, (class_name, color) in enumerate(zip(class_names, colors)):
    class_indices = y == class_idx
    plt.scatter(features_pca[class_indices, 0], features_pca[class_indices, 1], 
               c=color, label=class_name, alpha=0.7)

plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)')
plt.title('PCA Visualization (2D)')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"95%の分散を説明するのに必要な成分数: {np.argmax(np.cumsum(pca.explained_variance_ratio_) >= 0.95) + 1}")

## 6. 機械学習による分類

In [None]:
# データの分割
X_train, X_test, y_train, y_test = train_test_split(
    features_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# Random Forest分類器の訓練
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# 予測
y_pred = rf_classifier.predict(X_test)

# 性能評価
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=class_names))

# 混同行列
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
           xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# 特徴量重要度
feature_importance = rf_classifier.feature_importances_
sorted_indices = np.argsort(feature_importance)[::-1]

plt.figure(figsize=(12, 6))
plt.bar(range(len(feature_importance)), feature_importance[sorted_indices])
plt.xticks(range(len(feature_importance)), 
           [feature_names[i] for i in sorted_indices], rotation=45)
plt.title('Feature Importance (Random Forest)')
plt.ylabel('Importance')
plt.tight_layout()
plt.show()

## 7. 新しい音響データの分類予測

In [None]:
# 新しいテストサンプルの生成
def generate_test_sample(signal_type='mystery'):
    """
    新しいテストサンプルを生成
    """
    t = np.linspace(0, 1, 1000, False)
    
    if signal_type == 'mystery':
        # 複雑な信号（複数の要素を組み合わせ）
        signal_data = (np.sin(2 * np.pi * 250 * t) * np.exp(-t * 2) +  # 楽器的成分
                      0.3 * np.random.randn(len(t)) +  # ノイズ成分
                      0.5 * np.sin(2 * np.pi * 300 * t) * np.exp(-t * 1.5))  # 和音成分
    else:
        # シンプルなテスト信号
        signal_data = np.sin(2 * np.pi * 400 * t) + 0.1 * np.random.randn(len(t))
    
    return signal_data

# テストサンプルの生成と分類
test_samples = []
test_types = ['mystery', 'simple']

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.ravel()

for i, signal_type in enumerate(test_types):
    test_sample = generate_test_sample(signal_type)
    test_samples.append(test_sample)
    
    # 特徴量抽出
    test_features = extract_features([test_sample])
    test_features_scaled = scaler.transform(test_features)
    
    # 予測
    prediction = rf_classifier.predict(test_features_scaled)[0]
    probabilities = rf_classifier.predict_proba(test_features_scaled)[0]
    
    # 可視化
    axes[i*2].plot(test_sample)
    axes[i*2].set_title(f'Test Sample {i+1} ({signal_type})\nPredicted: {class_names[prediction]}')
    axes[i*2].set_ylabel('Amplitude')
    axes[i*2].grid(True)
    
    # 周波数スペクトル
    freqs, psd = signal.periodogram(test_sample, fs=1000)
    axes[i*2+1].semilogy(freqs, psd)
    axes[i*2+1].set_title('Frequency Spectrum')
    axes[i*2+1].set_xlabel('Frequency (Hz)')
    axes[i*2+1].set_ylabel('PSD')
    axes[i*2+1].grid(True)
    axes[i*2+1].set_xlim(0, 500)
    
    print(f"\nTest Sample {i+1} ({signal_type}):")
    print(f"Predicted Class: {class_names[prediction]}")
    print("Class Probabilities:")
    for j, (class_name, prob) in enumerate(zip(class_names, probabilities)):
        print(f"  {class_name}: {prob:.3f}")

plt.tight_layout()
plt.show()

## 8. 学習曲線とモデル性能の分析

In [None]:
from sklearn.model_selection import learning_curve, validation_curve

# 学習曲線
train_sizes, train_scores, val_scores = learning_curve(
    rf_classifier, features_scaled, y, cv=5, 
    train_sizes=np.linspace(0.1, 1.0, 10), random_state=42
)

# 結果の可視化
plt.figure(figsize=(15, 5))

# 学習曲線
plt.subplot(1, 3, 1)
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
val_mean = np.mean(val_scores, axis=1)
val_std = np.std(val_scores, axis=1)

plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
plt.plot(train_sizes, val_mean, 'o-', color='red', label='Validation Score')
plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, alpha=0.1, color='red')

plt.xlabel('Training Set Size')
plt.ylabel('Accuracy Score')
plt.title('Learning Curve')
plt.legend()
plt.grid(True)

# 検証曲線（木の数による性能変化）
plt.subplot(1, 3, 2)
param_range = [10, 20, 50, 100, 200, 500]
train_scores_val, val_scores_val = validation_curve(
    RandomForestClassifier(random_state=42), features_scaled, y,
    param_name='n_estimators', param_range=param_range, cv=5
)

train_mean_val = np.mean(train_scores_val, axis=1)
val_mean_val = np.mean(val_scores_val, axis=1)

plt.plot(param_range, train_mean_val, 'o-', color='blue', label='Training Score')
plt.plot(param_range, val_mean_val, 'o-', color='red', label='Validation Score')
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy Score')
plt.title('Validation Curve (n_estimators)')
plt.legend()
plt.grid(True)
plt.xscale('log')

# 各クラスの予測確率分布
plt.subplot(1, 3, 3)
test_probs = rf_classifier.predict_proba(X_test)
max_probs = np.max(test_probs, axis=1)
predictions = rf_classifier.predict(X_test)

for class_idx, class_name in enumerate(class_names):
    class_mask = y_test == class_idx
    class_probs = max_probs[class_mask]
    plt.hist(class_probs, alpha=0.7, label=class_name, bins=20)

plt.xlabel('Maximum Prediction Probability')
plt.ylabel('Frequency')
plt.title('Prediction Confidence Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nFinal Model Performance:")
print(f"Training Accuracy: {rf_classifier.score(X_train, y_train):.3f}")
print(f"Test Accuracy: {rf_classifier.score(X_test, y_test):.3f}")
print(f"Average Prediction Confidence: {np.mean(max_probs):.3f}")