In [1]:
import os
from pydub import AudioSegment
import random
import librosa
import librosa.display
import soundfile as sf
import shutil

# RELATED TOOLS:

## Delete Short Audio files

In [None]:
import os
from pydub import AudioSegment
from concurrent.futures import ThreadPoolExecutor

def get_audio_length(file_path):
    # Returns the duration of an audio file in seconds
    try:
        audio = AudioSegment.from_file(file_path)
        return len(audio) / 1000.0  # Convert milliseconds to seconds
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def find_files_to_delete(folder_path, min_duration, max_duration, num_threads=4):
    audio_files = [
        os.path.join(folder_path, f)
        for f in os.listdir(folder_path)
        if f.endswith(('.wav', '.flac'))
    ]

    files_to_delete = []

    def check_file(file_path):
        duration = get_audio_length(file_path)
        if duration is not None and (duration < min_duration or duration > max_duration):
            files_to_delete.append(file_path)

    # Checking song song
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        executor.map(check_file, audio_files)

    return files_to_delete

def delete_files(files_to_delete):
    for file_path in files_to_delete:
        os.remove(file_path)
        print(f"Deleted {file_path}")

def delete_in_range_audio_files(folder_path, min_duration=5, max_duration=7, num_threads=4):
    files_to_delete = find_files_to_delete(folder_path, min_duration, max_duration, num_threads)
    
    if files_to_delete:
        print(f"Found {len(files_to_delete)} files to delete. Deleting now...")
        delete_files(files_to_delete)
    else:
        print("No files to delete.")


In [None]:
folder_path = 'D:/DEEPFAKE_DETECTION/DATASETS/ENGLISH/DATA/ASVspoof2021_DF_eval_part00/flac'
minute_min = 3.83
minute_max = 6.8
num_threads = 8 # Setting the number of Threads for ThreadPoolExecutor
delete_in_range_audio_files(folder_path, minute_min, minute_max)

In [3]:
import os
from pydub import AudioSegment

def delete_short_audio_files(folder_path, medium_duration_min=5, medium_duration_max=7):
    def get_audio_length(file_path):
        audio = AudioSegment.from_file(file_path)
        duration = len(audio) / 1000.0  
        return duration

    for filename in os.listdir(folder_path):
        if filename.endswith(('.wav', '.flac')):  
            file_path = os.path.join(folder_path, filename)
            try:
                duration = get_audio_length(file_path)
                
                if duration < medium_duration_min or duration > medium_duration_max:
                    os.remove(file_path)
                    print(f"Deleted {filename} (duration: {duration:.2f} seconds)")
            except Exception as e:
                print(f"Could not process {filename}: {e}")


In [None]:
folder_path = 'D:/DEEPFAKE_DETECTION/DATASETS/ENGLISH/DATA/ASVspoof2021_DF_eval_part03/flac'
minute_min = 3.83
minute_max = 6.8
delete_short_audio_files(folder_path, minute_min, minute_max)

## MP3 to WAV

In [None]:
mp3_audio = AudioSegment.from_mp3("../DATASETS/temp")
mp3_audio.export("../DATASETS/cv-corpus-19.0-2024-09-13/vi/final", format="wav")

In [3]:
import os
from pydub import AudioSegment

input_dir = "../DATASETS/temp"
output_dir = "../DATASETS/cv-corpus-19.0-2024-09-13/vi/final"
os.makedirs(output_dir, exist_ok=True)
for file_name in os.listdir(input_dir):
    file_path = os.path.join(input_dir, file_name)
    if file_name.endswith(".mp3") and os.path.isfile(file_path):
        try:
            mp3_audio = AudioSegment.from_mp3(file_path)
            output_path = os.path.join(output_dir, os.path.splitext(file_name)[0] + ".wav")
            mp3_audio.export(output_path, format="wav")
            print(f"Converted: {file_name} -> {output_path}")
        except Exception as e:
            print(f"Failed to process {file_name}: {e}")

Converted: common_voice_vi_21824030.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824030.wav
Converted: common_voice_vi_21824032.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824032.wav
Converted: common_voice_vi_21824033.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824033.wav
Converted: common_voice_vi_21824034.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824034.wav
Converted: common_voice_vi_21824045.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824045.wav
Converted: common_voice_vi_21824047.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824047.wav
Converted: common_voice_vi_21824049.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21824049.wav
Converted: common_voice_vi_21833212.mp3 -> ../DATASETS/cv-corpus-19.0-2024-09-13/vi/final\common_voice_vi_21833212.wav
Converted: common_voice_vi_21833214.mp3 -> ../DA

## Split Audio

In [None]:
def split_audio(input_file, output_folder, segment_duration):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio = AudioSegment.from_file(input_file)
    total_duration = len(audio)
    num_segments = total_duration // segment_duration

    for i in range(num_segments):
        start_time = i * segment_duration
        end_time = min((i + 1) * segment_duration, total_duration)
        segment = audio[start_time:end_time]
        output_file = os.path.join(output_folder, f"nu8_{str(i+1).zfill(len(str(num_segments)))}.wav")
        segment.export(output_file, format="wav")

    print(f"Audio file split into {num_segments} segments.")

In [None]:
input_file = "../DATASETS/TTS/GiongGIA_TTSV2/nu/SOURCE/nu8.wav"
output_folder = "../DATASETS/TTS/GiongGIA_TTSV2/nu/SOURCE/nu8"
Audio_Segment = 5000 #Tính theo Mili giây nên 5000ms --> 5s
split_audio(input_file, output_folder, Audio_Segment)

Audio file split into 24 segments.


## Delete the first n letter of files

In [None]:
folder_path = '../DemoProject/Run/demo'

for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    if os.path.isfile(file_path):
        # Xóa bỏ 5 ký tự đầu tiên trong tên file -  ĐỔI CHỖ NÀY ĐỂ CÓ THỂ XÓA SỐ LƯỢNG KÍ TỰ ĐẦU TÙY THÍCH
        new_filename = filename[5:]
        new_file_path = os.path.join(folder_path, new_filename)
        
        os.rename(file_path, new_file_path)
        print(f'Renamed: {filename} -> {new_filename}')

print("Renaming completed.")


Renamed: 1000_Run2087_s.flac -> Run2087_s.flac
Renamed: 1002_Run0273_b.flac -> Run0273_b.flac
Renamed: 1002_Run0431_b.flac -> Run0431_b.flac
Renamed: 1003_Run0141_s.flac -> Run0141_s.flac
Renamed: 1003_Run0693_b.flac -> Run0693_b.flac
Renamed: 1003_Run1883_s.flac -> Run1883_s.flac
Renamed: 1003_Run4683_s.flac -> Run4683_s.flac
Renamed: 1003_Run5353_s.flac -> Run5353_s.flac
Renaming completed.


## Remove front files name

In [21]:
import os

folder_path = '../DemoProject/Vali/ValiAudi'

for filename in os.listdir(folder_path):
    if "vali" in filename:
        new_name = filename.split("_vali", 1)[1]
        new_name = f"vali{new_name}"  
        
        old_file = os.path.join(folder_path, filename)
        new_file = os.path.join(folder_path, new_name)
        
        os.rename(old_file, new_file)
        print(f"Renamed: {filename} -> {new_name}")

print("Renaming completed!")


Renamed: 1025_40847_vali_0052_s.wav -> vali_0052_s.wav
Renamed: 1047_93866_vali_0057_b.wav -> vali_0057_b.wav
Renamed: 1137_34343_vali_0039_b.wav -> vali_0039_b.wav
Renamed: 1152_19278_vali_0017_b.wav -> vali_0017_b.wav
Renamed: 119_45182_vali_0024_s.wav -> vali_0024_s.wav
Renamed: 1232_17657_vali_0059_s.wav -> vali_0059_s.wav
Renamed: 1256_413_vali_0028_b.wav -> vali_0028_b.wav
Renamed: 1275_42221_vali_0023_b.wav -> vali_0023_b.wav
Renamed: 131_38255_vali_0051_s.wav -> vali_0051_s.wav
Renamed: 1336_46821_vali_0074_s.wav -> vali_0074_s.wav
Renamed: 1342_34570_vali_0025_b.wav -> vali_0025_b.wav
Renamed: 1403_90936_vali_0018_s.wav -> vali_0018_s.wav
Renamed: 1422_49528_vali_0064_b.wav -> vali_0064_b.wav
Renamed: 145_18818_vali_0055_b.wav -> vali_0055_b.wav
Renamed: 1696_79127_vali_0005_s.wav -> vali_0005_s.wav
Renamed: 1858_47465_vali_0031_s.wav -> vali_0031_s.wav
Renamed: 1897_62314_vali_0066_s.wav -> vali_0066_s.wav
Renamed: 1982_84373_vali_0032_s.wav -> vali_0032_s.wav
Renamed: 2013_5

# Audio Namer


In [5]:
# Xác định path của folder chứa các file mp3 muốn đổi tên.
directory = "../PROJECT/1.English_EN/run/RealRun"
file_list = os.listdir(directory)
counter = 1
num_digits = 4

for filename in file_list:
    if filename.endswith(".flac"):
        new_name = f"run_{counter:0{num_digits}}_b.flac"

        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_name)
        print(f"Renaming: {old_path} -> {new_path}")
    
        try:
            os.rename(old_path, new_path)
            print(f"Renamed {filename} to {new_name}")
        except FileNotFoundError as e:
            print(f"Error renaming {filename}: {e}")
        counter += 1
print("File renaming completed.")

Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2039749.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0001_b.flac
Renamed DF_E_2039749.flac to run_0001_b.flac
Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2040395.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0002_b.flac
Renamed DF_E_2040395.flac to run_0002_b.flac
Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2040522.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0003_b.flac
Renamed DF_E_2040522.flac to run_0003_b.flac
Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2040833.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0004_b.flac
Renamed DF_E_2040833.flac to run_0004_b.flac
Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2041021.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0005_b.flac
Renamed DF_E_2041021.flac to run_0005_b.flac
Renaming: ../PROJECT/1.English_EN/run/RealRun\DF_E_2041322.flac -> ../PROJECT/1.English_EN/run/RealRun\run_0006_b.flac
Renamed DF_E_2041322.flac to run_0006_b.flac
Renaming: ../PRO

# Shuffle Folder

In [2]:
def shuffle_folder(folder_path):
    files = os.listdir(folder_path)
    
    random.shuffle(files)
    
    for i, filename in enumerate(files):
        random_prefix = str(random.randint(10000, 99999))
        
        new_filename = f"{random_prefix}_{filename}"
        
        old_file = os.path.join(folder_path, filename)
        new_file = os.path.join(folder_path, new_filename)
        os.rename(old_file, new_file)


In [None]:
folder_path = r"../PROJECT/1.English_EN/run/Runner"
shuffle_folder(folder_path)


# Convert WAV 2 FLAC

In [57]:
import os
from pydub import AudioSegment
wav_folder_path = './Run/RunAudi'
flac_folder_path = './Run/RunFlac' 

In [58]:

file_names = os.listdir(wav_folder_path)

In [59]:
for file_name in file_names:
    if file_name.endswith('.wav'):
        wav_file_path = os.path.join(wav_folder_path, file_name)
        audio = AudioSegment.from_wav(wav_file_path)
        
        flac_file_name = os.path.splitext(file_name)[0] + '.flac'
        flac_file_path = os.path.join(flac_folder_path, flac_file_name)
        audio.export(flac_file_path, format='flac')

# Label maker

In [4]:
folder_path = '../PROJECT/1.English_EN/run/Runner'
file_names = sorted(os.listdir(folder_path))


with open('file_label_run.txt', 'w') as f:
    for file_name in file_names:
        file_name_no_ext = os.path.splitext(file_name)[0]
        
        if file_name_no_ext.endswith('_b'):
            f.write(f"GIONG {file_name_no_ext} - - bonafide\n")
        elif file_name_no_ext.endswith('_s'):
            f.write(f"GIONG {file_name_no_ext} - - spoof\n")

print("File list created successfully.")


File list created successfully.


# File Copy & Paste

In [4]:
def copy_files(src_folder, dest_folder):
    if not os.path.exists(src_folder):
        print(f"Source folder '{src_folder}' does not exist.")
        return
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
        print(f"Destination folder '{dest_folder}' created.")
    for filename in os.listdir(src_folder):
        file_path = os.path.join(src_folder, filename)
        
        if os.path.isfile(file_path):
            shutil.copy(file_path, dest_folder)
            print(f"Copied: {filename}")
    print("All files copied successfully.")

In [23]:
src_folder = '../DemoProject/Vali/ValiFake'  
dest_folder = '../DemoProject/Vali/ValiAudi' 

copy_files(src_folder, dest_folder)

Copied: vali_0001_s.wav
Copied: vali_0002_s.wav
Copied: vali_0003_s.wav
Copied: vali_0004_s.wav
Copied: vali_0005_s.wav
Copied: vali_0006_s.wav
Copied: vali_0007_s.wav
Copied: vali_0008_s.wav
Copied: vali_0009_s.wav
Copied: vali_0010_s.wav
Copied: vali_0011_s.wav
Copied: vali_0012_s.wav
Copied: vali_0013_s.wav
Copied: vali_0014_s.wav
Copied: vali_0015_s.wav
Copied: vali_0016_s.wav
Copied: vali_0017_s.wav
Copied: vali_0018_s.wav
Copied: vali_0019_s.wav
Copied: vali_0020_s.wav
Copied: vali_0021_s.wav
Copied: vali_0022_s.wav
Copied: vali_0023_s.wav
Copied: vali_0024_s.wav
Copied: vali_0025_s.wav
Copied: vali_0026_s.wav
Copied: vali_0027_s.wav
Copied: vali_0028_s.wav
Copied: vali_0029_s.wav
Copied: vali_0030_s.wav
Copied: vali_0031_s.wav
Copied: vali_0032_s.wav
Copied: vali_0033_s.wav
Copied: vali_0034_s.wav
Copied: vali_0035_s.wav
Copied: vali_0036_s.wav
Copied: vali_0037_s.wav
Copied: vali_0038_s.wav
Copied: vali_0039_s.wav
Copied: vali_0040_s.wav
Copied: vali_0041_s.wav
Copied: vali_004