# Audio Integrity and Repair Workflow

This notebook demonstrates how to check and repair audio file integrity using Python libraries and command-line tools. It covers detection, analysis, and automated repair of audio files.

In [None]:
# Section 1: Import Required Libraries
import os
import shutil
from pydub import AudioSegment
import librosa
import soundfile as sf
import subprocess

In [None]:
# Section 2: Define Supported Audio Formats
AUDIO_EXTS = ['.wav', '.mp3', '.flac', '.aiff', '.ogg', '.m4a', '.aac', '.wma', '.mp4']

In [None]:
# Section 3: Implement Audio File Detection
def is_audio_file(filename):
    return any(filename.lower().endswith(ext) for ext in AUDIO_EXTS)

In [None]:
# Section 4: Implement Audio Integrity Check
def check_integrity(filepath):
    try:
        audio = AudioSegment.from_file(filepath)
        if len(audio) < 1000:
            return False, 'Too short'
        if audio.dBFS < -50:
            return False, 'Mostly silence'
        y, sr = librosa.load(filepath, sr=None)
        if librosa.get_duration(y=y, sr=sr) < 1:
            return False, 'Librosa: too short'
        if max(abs(y)) < 0.01:
            return False, 'Librosa: too quiet'
        return True, 'OK'
    except Exception as e:
        return False, f'Error: {e}'

In [None]:
# Section 5: Implement Audio Repair Function
def attempt_repair(filepath, ready_folder):
    repaired_path = os.path.join(ready_folder, os.path.basename(filepath))
    try:
        audio = AudioSegment.from_file(filepath)
        audio.export(repaired_path, format='wav')
        return repaired_path, 'Repaired with pydub'
    except Exception as e:
        pydub_error = str(e)
    try:
        ffmpeg_path = repaired_path.replace('.wav', '_ffmpeg.wav')
        cmd = [
            'ffmpeg', '-y', '-i', filepath,
            '-c:a', 'pcm_s16le', ffmpeg_path
        ]
        subprocess.run(cmd, check=True)
        return ffmpeg_path, 'Repaired with ffmpeg'
    except Exception as e:
        ffmpeg_error = str(e)
    try:
        sox_path = repaired_path.replace('.wav', '_sox.wav')
        cmd = [
            'sox', filepath, sox_path, 'noisered', '0.21', 'silence', '1', '0.1', '1%']
        subprocess.run(cmd, check=True)
        return sox_path, 'Repaired with sox'
    except Exception as e:
        sox_error = str(e)
    return None, f'Repair failed: pydub({pydub_error}), ffmpeg({ffmpeg_error}), sox({sox_error})'

In [None]:
# Section 6: Process Audio Files in Directory
AUDIO_ROOT = '/Volumes/JOE/DUPES FROM 12TB'
REPAIR_FOLDER = os.path.join(AUDIO_ROOT, 'needs_repair')
READY_FOLDER = os.path.join(AUDIO_ROOT, 'ready_to_go')
os.makedirs(REPAIR_FOLDER, exist_ok=True)
os.makedirs(READY_FOLDER, exist_ok=True)

def process_audio_files():
    for fname in os.listdir(AUDIO_ROOT):
        fpath = os.path.join(AUDIO_ROOT, fname)
        if os.path.isfile(fpath) and is_audio_file(fname):
            ok, reason = check_integrity(fpath)
            if ok:
                shutil.copy2(fpath, READY_FOLDER)
                print(f'{fname}: OK')
            else:
                print(f'{fname}: Damaged ({reason})')
                shutil.move(fpath, REPAIR_FOLDER)
                repaired, rep_reason = attempt_repair(os.path.join(REPAIR_FOLDER, fname), READY_FOLDER)
                if repaired:
                    print(f'{fname}: {rep_reason}')
                else:
                    print(f'{fname}: Could not repair ({rep_reason})')

In [None]:
# Section 7: Run Audio Integrity Workflow
process_audio_files()
print('Audio integrity check complete.')