In [69]:
import pandas as pd
import numpy as np
from pathlib import Path
import wave
from scipy.fft import rfft, irfft
import sys
import soundfile as sf
import pyloudnorm as pyln
from pydub import AudioSegment
from scipy.io import wavfile
from dataclasses import (
    dataclass,
)
from typing import (
    Optional,
)

sys.path.append('..')
from audio import Audio
from volume.human_speech import (
    HUMAN_SPEECH_FREQ_BOTTOM,
    HUMAN_SPEECH_FREQ_TOP,
    HIGH_FREQUENCY_SPEECH_THRESHOLD,
)
from configs.base import (
    RB_FILE_READING_MODE,
)

In [23]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

%load_ext autoreload
%autoreload all

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# volume

In [5]:
EXAMPLE_AUDIO_PATH:Path = Path('/data/vkiselev/data/other/univer/deploma/dusha/crowd/crowd_train/wavs/00000d522439136554c888f4cfd92131.wav')

In [31]:
audio_example:Audio = Audio.wav_file_path_init(path=EXAMPLE_AUDIO_PATH)
audio_example

Audio(sample_width=2, sr=16000, n_frames=85120, data=array([  0,   0,   0, ..., -10,   1,  -1], dtype=int16), n_channels=1)

In [37]:
def speech_filter(
    audio:Audio, 
    low_freq=HUMAN_SPEECH_FREQ_BOTTOM, 
    high_freq=HUMAN_SPEECH_FREQ_TOP,
    )->Audio:

    fft_result:np.ndarray = rfft(audio.data)
    fft_result_filtered:np.ndarray = fft_result.copy()
    freqs:np.ndarray = np.fft.fftfreq(audio.n_frames, d=1.0/audio.sr)

    positive_freqs:np.ndarray = freqs[:len(freqs) // 2 + 1]

    for i, freq in enumerate(positive_freqs):
        if abs(freq) > high_freq or abs(freq) < low_freq:
            fft_result_filtered[i] = 0

    filtered_signal:np.ndarray = irfft(fft_result_filtered)
    sample_dtype:type = audio.sample_dtype()
    filtered_signal:np.ndarray = filtered_signal.astype(sample_dtype) 
    return audio.new_data_copy(data=filtered_signal)

audio_filtered = speech_filter(audio=audio_example)

speech_filter(audio=audio_example)


Audio(sample_width=2, sr=16000, n_frames=85120, data=array([ 1,  1,  1, ..., -4,  2,  0], dtype=int16), n_channels=1)

In [66]:
def audio_volume(
    audio_path:Path,
    )->np.float64:
    try:
        data, rate = sf.read(audio_path)
        meter:pyln.meter.Meter = pyln.Meter(rate)
        return meter.integrated_loudness(data)

    except Exception as e:
        print(f"Error: {e}")
        return None

loudness = audio_volume(EXAMPLE_AUDIO_PATH)

In [67]:
loudness

np.float64(-45.12517989530321)

In [49]:
data, rate = sf.read(EXAMPLE_AUDIO_PATH)
type(data), type(rate)

(numpy.ndarray, int)

In [64]:
def wav_path_2_HF_power_ratio(
    file_path:Path,
    HF_threshold:int = HIGH_FREQUENCY_SPEECH_THRESHOLD,
    )->np.float64:
    sampling_rate, signal = wavfile.read(file_path)
    # Normalize to [-1, 1]
    signal:np.ndarray = signal / np.max(np.abs(signal))

    # Apply Hann window
    window:np.ndarray = np.hanning(len(signal))
    signal_windowed:np.ndarray = signal * window

    n:int = len(signal_windowed)
    freq_magnitudes:np.ndarray = np.abs(np.fft.fft(signal_windowed))
    freqs:np.ndarray = np.fft.fftfreq(n, d=1/sampling_rate)

    # Keep only positive frequencies (half the spectrum)
    positive_freqs:np.ndarray = freqs[:n//2]
    positive_magnitudes:np.ndarray = freq_magnitudes[:n//2]

    # Convert magnitudes to power (energy)
    power_spectrum:np.ndarray = positive_magnitudes ** 2

    total_energy:np.float64 = np.sum(power_spectrum)
    high_freq_mask:np.ndarray = positive_freqs > HF_threshold  # Adjust threshold as needed
    high_freq_energy:np.float64 = np.sum(power_spectrum[high_freq_mask])

    ratio:np.float64 = high_freq_energy / total_energy
    return ratio 

wav_path_2_HF_power_ratio(EXAMPLE_AUDIO_PATH)

np.float64(0.024670111756333636)

In [68]:
@dataclass
class HighLevelSpeechFeatures:
    loudness: np.float64
    HF_power_ratio:np.float64
    @classmethod
    def wav_path_init(
        path:Path,
        transcription:Optional[str] = None,
        ):

        pass


In [70]:
from bdw.check import Check

# Initialize the filter for Russian language
filter_ru = Check(languages=['ru'])

# Input text to check
text:str = 'Нормально сегодня отдохнул!'

# Check if the text contains obscene words
if filter_ru.filter_profanity(text, language='ru'):
    print("Текст содержит ненормативную лексику.")
else:
    print("Текст не содержит ненормативной лексики.")


ModuleNotFoundError: No module named 'bdw'

In [72]:
!pip install git+https://github.com/FlacSy/bdw.git

Collecting git+https://github.com/FlacSy/bdw.git
  Cloning https://github.com/FlacSy/bdw.git to /tmp/pip-req-build-1pgiko62
  Running command git clone --filter=blob:none --quiet https://github.com/FlacSy/bdw.git /tmp/pip-req-build-1pgiko62
  remote: Repository not found.
  fatal: Authentication failed for 'https://github.com/FlacSy/bdw.git/'
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mgit clone --[0m[32mfilter[0m[32m=[0m[32mblob[0m[32m:none --quiet [0m[4;32mhttps://github.com/FlacSy/bdw.git[0m[32m [0m[32m/tmp/[0m[32mpip-req-build-1pgiko62[0m did not run successfully.
  [31m│[0m exit code: [1;36m128[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mgit clone --[0m[32mfilter[0m[32m=[0m[32mblob[0m[32m:none --quiet [0m[4;32mhttps://github.com/FlacSy/bdw.git[0m[32