<a href="https://colab.research.google.com/github/yezzzzin/LIEON_preprocessing/blob/main/vsa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opensmile

Collecting opensmile
  Downloading opensmile-2.5.0-py3-none-manylinux_2_17_x86_64.whl (996 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m996.2/996.2 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audobject>=0.6.1 (from opensmile)
  Downloading audobject-0.7.11-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audinterface>=0.7.0 (from opensmile)
  Downloading audinterface-1.2.1-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting audeer>=1.18.0 (from audinterface>=0.7.0->opensmile)
  Downloading audeer-2.0.0-py3-none-any.whl (39 kB)
Collecting audformat<2.0.0,>=1.0.1 (from audinterface>=0.7.0->opensmile)
  Downloading audformat-1.1.3-py3-none-any.whl (141 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.7/141

In [None]:
# 필요한 라이브러리 및 모듈 import
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import opensmile
import pandas as pd
import os

In [None]:
# opensmile 라이브러리 초기화
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.Functionals
)

In [None]:
# MFCC와 VSA 특징 추출 함수
def preprocess_and_extract_mfcc_vsa(audio_data, sr=48000, n_mfcc=13):
    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=n_mfcc)

    return mfccs

def extract_vsa_features(audio_data, sr=48000):
    # Process audio signal to extract VSA features
    vsa_features = smile.process_signal(audio_data, sr)

    return vsa_features

In [None]:
# 보이스피셔와 피해자 wav 파일 디노이징 및 MFCC 추출
def process_audio(audio_file):
    # Load audio file
    y, sr = librosa.load(audio_file, sr=None)

    # Apply denoising (example: using spectral subtraction)
    y_denoised = librosa.effects.preemphasis(y)

    # Extract MFCC features
    mfcc_features = preprocess_and_extract_mfcc_vsa(y_denoised)

    # Extract VSA features
    vsa_features = extract_vsa_features(y_denoised)

    return mfcc_features, vsa_features

In [None]:
# 보이스피셔와 피해자 wav 파일 경로
voice_file = '/path/to/merged_voice_file.wav'
victim_file = '/path/to/merged_victim_file.wav'

In [None]:
# 보이스피셔와 피해자 wav 파일 처리
voice_mfcc, voice_vsa = process_audio(voice_file)
victim_mfcc, victim_vsa = process_audio(victim_file)

In [None]:
# CSV 파일 저장
voice_mfcc_df = pd.DataFrame(voice_mfcc)
voice_mfcc_df.to_csv('/path/to/voice_mfcc.csv', index=False)

voice_vsa_df = pd.DataFrame(voice_vsa)
voice_vsa_df.to_csv('/path/to/voice_vsa.csv', index=False)

victim_mfcc_df = pd.DataFrame(victim_mfcc)
victim_mfcc_df.to_csv('/path/to/victim_mfcc.csv', index=False)

victim_vsa_df = pd.DataFrame(victim_vsa)
victim_vsa_df.to_csv('/path/to/victim_vsa.csv', index=False)

In [None]:
# 시각화
plt.figure(figsize=(10, 4))
librosa.display.specshow(voice_mfcc, x_axis='time')
plt.colorbar()
plt.title('Voice MFCC')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
plt.hist(voice_vsa, bins=62)
plt.title('Voice VSA Histogram')
plt.xlabel('Feature Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 4))
librosa.display.specshow(victim_mfcc, x_axis='time')
plt.colorbar()
plt.title('Victim MFCC')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
plt.hist(victim_vsa, bins=62)
plt.title('Victim VSA Histogram')
plt.xlabel('Feature Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
# 각 VSA 특징의 값을 출력
for i, voice_feature_value in enumerate(voice_vsa):
    print(f"Voice VSA Feature {i+1}: {voice_feature_value}")

for i, victim_feature_value in enumerate(victim_vsa):
    print(f"Victim VSA Feature {i+1}: {victim_feature_value}")

VSA Feature 1: F0semitoneFrom27.5Hz_sma3nz_amean
VSA Feature 2: F0semitoneFrom27.5Hz_sma3nz_stddevNorm
VSA Feature 3: F0semitoneFrom27.5Hz_sma3nz_percentile20.0
VSA Feature 4: F0semitoneFrom27.5Hz_sma3nz_percentile50.0
VSA Feature 5: F0semitoneFrom27.5Hz_sma3nz_percentile80.0
VSA Feature 6: F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2
VSA Feature 7: F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope
VSA Feature 8: F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope
VSA Feature 9: F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope
VSA Feature 10: F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope
VSA Feature 11: loudness_sma3_amean
VSA Feature 12: loudness_sma3_stddevNorm
VSA Feature 13: loudness_sma3_percentile20.0
VSA Feature 14: loudness_sma3_percentile50.0
VSA Feature 15: loudness_sma3_percentile80.0
VSA Feature 16: loudness_sma3_pctlrange0-2
VSA Feature 17: loudness_sma3_meanRisingSlope
VSA Feature 18: loudness_sma3_stddevRisingSlope
VSA Feature 19: loudness_sma3_meanFallingSlope
VSA Feature 20: loudness_sma