<a href="https://colab.research.google.com/github/PrzemyslawCh/TTS/blob/main/Splitting_Audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ffmpeg-python
import ffmpeg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import re
import subprocess
import os
import ffmpeg

def get_silences(filename, silence_len=1.0, silence_db=-30):
    result = (
        ffmpeg.input(filename)
        .output('pipe:', format='null', hide_banner=None)
        .global_args('-af', f'silencedetect=noise={silence_db}dB:d={silence_len}')
        .run(capture_stdout=True, capture_stderr=True)
    )
    output = result[1].decode()
    silences = re.findall(r'\[silencedetect @ \S+\] silence_start: (\S+)', output)
    silences = [0.] + [float(s) for s in silences]
    if 'silence_end' in output:
        silences += [float(s) for s in re.findall(r'\[silencedetect @ \S+\] silence_end: (\S+)', output)]
    else:
        silences += [float(ffmpeg.probe(filename)['format']['duration'])]
    return [(silences[i], silences[i+1]) for i in range(0, len(silences)-1, 2)]

def split_on_silences(filename, output_dir, silence_len=1.0, silence_db=-30):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    silences = get_silences(filename, silence_len, silence_db)
    for i in range(len(silences)-1):
        start_time = silences[i][1]
        end_time = silences[i+1][0]
        basename = os.path.basename(filename)
        name, ext = os.path.splitext(basename)
        output_filename = os.path.join(output_dir, f"{name}_{i+1}{ext}")
        ffmpeg.input(filename).output(output_filename, ss=start_time, t=end_time-start_time).run()

# Use your file path
src_filename = "/content/drive/MyDrive/s-01.wav"
output_dir = "/content/drive/MyDrive/Saker"

split_on_silences(src_filename, output_dir)


In [None]:
!pip install pydub


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence

def split_audio_on_silence(input_file, output_dir, min_silence_duration=1000, silence_threshold=-40):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    audio = AudioSegment.from_wav(input_file)
    chunks = split_on_silence(
        audio,
        min_silence_duration,
        silence_thresh=silence_threshold
    )

    for i, chunk in enumerate(chunks):
        output_file = os.path.join(output_dir, f"{i+1}.wav")
        chunk.export(output_file, format="wav")

# Provide your input file path and output directory
input_file = "/content/drive/MyDrive/s-01.wav"
output_dir = "/content/drive/MyDrive/Saker"

split_audio_on_silence(input_file, output_dir)


In [None]:
import os
from pydub import AudioSegment
from pydub.silence import detect_nonsilent

def split_audio_with_silence(input_file, output_dir, target_duration=30, min_silence_duration=1000, silence_threshold=-40):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    audio = AudioSegment.from_wav(input_file)
    duration = len(audio) / 1000  # Convert duration to seconds

    num_parts = int(duration / target_duration)
    remaining_duration = duration % target_duration

    if remaining_duration > 0:
        num_parts += 1

    nonsilent_parts = detect_nonsilent(audio, min_silence_duration, silence_threshold)

    num_chunks = len(nonsilent_parts)

    for i in range(num_parts):
        if i >= num_chunks:
            # If there are no more chunks from the silence splitting, use the remaining duration
            start_time = i * target_duration * 1000  # Convert to milliseconds
            end_time = (i + 1) * target_duration * 1000  # Convert to milliseconds
        else:
            chunk = nonsilent_parts[i]
            start_time = chunk[1]
            end_time = chunk[0]

        part = audio[start_time:end_time].set_channels(1)  # Convert to mono channel
        output_file = os.path.join(output_dir, f"{i+1}.wav")
        part.export(output_file, format="wav")

# Provide your input file path and output directory
input_file = "/content/drive/MyDrive/s-01.wav"
output_dir = "/content/drive/MyDrive/Saker"

split_audio_with_silence(input_file, output_dir)



In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive
