In [19]:
#set project path from config.yaml
import pathlib
import yaml

with open("config.yaml", "r") as f:
   config = yaml.safe_load(f)

project_root = pathlib.Path(config["project"]["root_path"])

print("Current project path: ", project_root)

Current project path:  C:\Users\Konstantinos\Desktop\Coughvid Data


In [20]:
#Dataset currenntly has 3 file formats
    #-webm
    #-ogg
    #-wav

#WAV is uncompressed and has better compatibility with librosa and pyaudioanalysis. ogg and wav need ffmpeg installed locally and different libraries which will result in 
    #a lot of unecessary trouble during proccesing. Here we'll convert all the .ogg and .webm files to .wav to make signal analysis and subsequent feature extraction easier.

#current directories
webm_dir = project_root / "audio_webm"
ogg_dir = project_root / "audio_ogg"
wav_dir = project_root / "audio_wav"

In [21]:
#create a new directory to move all the new and old .wav files in 
import pathlib 

wav_fulldset_dir = project_root / "audio_wav_fulldset"

wav_fulldset_dir.mkdir(parents = True, exist_ok=True)

In [22]:
import os
import subprocess
from tqdm import tqdm
import imageio_ffmpeg as ffbin

# Get the absolute path to ffmpeg.exe from your venv
ffmpeg_exe = ffbin.get_ffmpeg_exe()

# Ensure output directory exists
os.makedirs(wav_fulldset_dir, exist_ok=True)

# List all wav files in the source directory
files = [f for f in os.listdir(wav_dir) if f.lower().endswith('.wav')]

# Counter for files processed
processed_count = 0

for filename in tqdm(files, desc="Standardizing WAVs to 8kHz"):
    src_file = os.path.join(wav_dir, filename)
    dst_file = os.path.join(wav_fulldset_dir, filename)
    
    try:
        # -acodec pcm_s16le: Ensures standard 16-bit WAV format
        subprocess.run([
            ffmpeg_exe, "-y", "-i", src_file,
            "-ar", "8000", # 8KHz regardless to avoid headaches with sampling rates 
            "-ac", "1", #explicitly to mono
            "-acodec", "pcm_s16le",
            dst_file
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
        
        processed_count += 1
            
    except subprocess.CalledProcessError as e:
        print(f"Error processing {filename}: {e}")

print("-" * 30)
print(f"Processing Complete.")
print(f"Total files standardized and moved to full dataset: {processed_count}")

Standardizing WAVs to 8kHz: 100%|██████████| 3309/3309 [02:35<00:00, 21.32it/s]

------------------------------
Processing Complete.
Total files standardized and moved to full dataset: 3309





In [None]:
import os
import subprocess
from tqdm import tqdm
import imageio_ffmpeg as ffbin

# Define the ffmpeg executable from the venv
ffmpeg_exe = ffbin.get_ffmpeg_exe()

# Create destination directory if it doesn't exist
os.makedirs(wav_fulldset_dir, exist_ok=True)

# List WebM files
webm_files = [f for f in os.listdir(webm_dir) if f.lower().endswith('.webm')]

# Loop through files and convert
for filename in tqdm(webm_files, desc="Converting WebM to WAV"):
    input_path = os.path.join(webm_dir, filename)
    # Change extension to .wav for the output filename
    output_filename = os.path.splitext(filename)[0] + ".wav"
    output_path = os.path.join(wav_fulldset_dir, output_filename)

#-vn: disable video (safety for webm)
    subprocess.run([
        ffmpeg_exe, "-y", "-i", input_path, 
        "-vn", "-acodec", "pcm_s16le", "-ar",
          "8000", #EXPLICITLY subsampling the entire dset to 8khz to save memory and since the
                        #input data is phone recordings which are already low quality, the extra bandwidth won't help much  
          "-ac", "1", 
        output_path
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

print(f"WebM conversion complete. Files saved to: {wav_fulldset_dir}")

Converting WebM to WAV: 100%|██████████| 29348/29348 [22:43<00:00, 21.52it/s]

WebM conversion complete. Files saved to: C:\Users\Konstantinos\Desktop\Coughvid Data\audio_wav_fulldset





In [None]:
import os
import subprocess
from tqdm import tqdm
import imageio_ffmpeg as ffbin


ffmpeg_exe = ffbin.get_ffmpeg_exe()


os.makedirs(wav_fulldset_dir, exist_ok=True)


ogg_files = [f for f in os.listdir(ogg_dir) if f.lower().endswith('.ogg')]


for filename in tqdm(ogg_files, desc="Converting OGG to WAV"):
    input_path = os.path.join(ogg_dir, filename)
    output_filename = os.path.splitext(filename)[0] + ".wav"
    output_path = os.path.join(wav_fulldset_dir, output_filename)
    subprocess.run([
        ffmpeg_exe, "-y", "-i", input_path, 
        "-acodec", "pcm_s16le", 
        "-ar", "8000", 
        "-ac", "1", 
        output_path
    ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

print(f"OGG conversion complete. Files saved to: {wav_fulldset_dir}")

Converting OGG to WAV: 100%|██████████| 1777/1777 [01:28<00:00, 20.12it/s]

OGG conversion complete. Files saved to: C:\Users\Konstantinos\Desktop\Coughvid Data\audio_wav_fulldset



