In [43]:
import tempfile

import numpy as np
import soundfile as sf
from pydub import AudioSegment


def cutoff_as_mp3(input_file, output_file, window_size=0.005, threshold=0.005):
    try:
        # Open the audio file using soundfile
        signal, sample_rate = sf.read(input_file)

        # Compute the magnitude of the audio signal
        magnitude = np.abs(signal)

        # Compute the moving average of the magnitude using a rolling window
        window_length = int(window_size * sample_rate)
        ma = np.convolve(
            np.mean(magnitude, axis=1), np.ones(window_length) / window_length, mode="valid"
        )

        # Find the index of the last sample above the threshold
        cutoff_idx = len(signal) - np.argmax(ma[::-1] > threshold)

        # Cut off the audio at the index
        signal = signal[:cutoff_idx]

        # Save to a temporary file object
        with tempfile.TemporaryFile() as temp:
            sf.write(temp, signal, sample_rate, format="WAV")

            # Reset the file pointer
            temp.seek(0)

            # Convert temp file to MP3
            audio_segment = AudioSegment.from_wav(temp)

        audio_segment.export(output_file, format="mp3")
    except sf.LibsndfileError:
        return

In [30]:
import pandas as pd

In [40]:
hym = pd.read_csv('hymnary.csv')
hym.drop(hym.loc[hym['id'] == 649].index, inplace=True)

In [41]:
hym.drop_duplicates("id").apply(
    lambda x: cutoff_as_mp3(
        f"pianoteq_mp/hymnary{x['id']:03}.wav",
        f"output/{x['id']:03} {x['name_ch']}（AI）.mp3",
    ),
    axis=1,
)

0      None
1      None
2      None
3      None
4      None
       ... 
716    None
717    None
718    None
719    None
720    None
Length: 715, dtype: object

In [45]:
hym.drop_duplicates("id").apply(
    lambda x: cutoff_as_mp3(
        f"pianoteq_hn/hymnary{x['id']:03}.wav",
        f"output/{x['id']:03} {x['name_ch']}（司琴）.mp3",
    ),
    axis=1,
)

0      None
1      None
2      None
3      None
4      None
       ... 
716    None
717    None
718    None
719    None
720    None
Length: 715, dtype: object