In [1]:
import os

destination_folder = "/teamspace/studios/this_studio/audio_detect/dataset"

# Get all subdirectories only
dirs = [d for d in os.listdir(destination_folder) if os.path.isdir(os.path.join(destination_folder, d))]

print("Directories inside dataset folder:")
for d in dirs:
    print(d)


Directories inside dataset folder:
for-rerec
eval
combined_folder
DEMONSTRATION
KAGGLE
for-norm
for-original
train
for-2sec
dev
generated_audio


In [2]:
import os

destination_folder = "/teamspace/studios/this_studio/audio_detect/dataset"

print("Directories inside dataset folder and their subdirectories:\n")

for root, dirs, files in os.walk(destination_folder):
    # Print the current directory (relative to the destination folder)
    relative_root = os.path.relpath(root, destination_folder)
    print(f"📁 {relative_root}" if relative_root != "." else f"📁 {destination_folder}")

    # Print subdirectories
    for d in dirs:
        print(f"  └── {d}")


Directories inside dataset folder and their subdirectories:

📁 /teamspace/studios/this_studio/audio_detect/dataset
  └── for-rerec
  └── eval
  └── combined_folder
  └── DEMONSTRATION
  └── KAGGLE
  └── for-norm
  └── for-original
  └── train
  └── for-2sec
  └── dev
  └── generated_audio
📁 for-rerec
  └── for-rerecorded
📁 for-rerec/for-rerecorded
  └── validation
  └── training
  └── testing
📁 for-rerec/for-rerecorded/validation
  └── real
  └── fake
📁 for-rerec/for-rerecorded/validation/real
📁 for-rerec/for-rerecorded/validation/fake
📁 for-rerec/for-rerecorded/training
  └── real
  └── fake
📁 for-rerec/for-rerecorded/training/real
📁 for-rerec/for-rerecorded/training/fake
📁 for-rerec/for-rerecorded/testing
  └── real
  └── fake
📁 for-rerec/for-rerecorded/testing/real
📁 for-rerec/for-rerecorded/testing/fake
📁 eval
  └── real
  └── fake
📁 eval/real
📁 eval/fake
📁 combined_folder
  └── real
  └── fake
📁 combined_folder/real
📁 combined_folder/fake
📁 DEMONSTRATION
  └── DEMONSTRATION
📁 DEMO

In [3]:
import os
import shutil
from datetime import datetime
import random

# Configuration
DATASET_ROOT = "/teamspace/studios/this_studio/audio_detect/dataset"
MERGED_ROOT = os.path.join(DATASET_ROOT, "merged_data")
REAL_TARGET = os.path.join(MERGED_ROOT, "real")
FAKE_TARGET = os.path.join(MERGED_ROOT, "fake")
AUDIO_EXTENSIONS = {'.wav', '.mp3', '.flac', '.ogg', '.m4a'}  # Expand as needed

def unique_filename(prefix: str, filename: str) -> str:
    """Generate a collision-resistant filename with timestamp and random suffix."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
    rand_suffix = random.randint(1000, 9999)
    return f"{prefix}_{timestamp}_{rand_suffix}_{filename}"

def is_audio_file(filename: str) -> bool:
    """Check if file has a valid audio extension (case-insensitive)."""
    return os.path.splitext(filename)[1].lower() in AUDIO_EXTENSIONS

def copy_audio_files(source_dir: str, target_dir: str, prefix: str):
    """Copy all audio files from source_dir to target_dir with prefixed names."""
    for root, _, files in os.walk(source_dir):
        for file in files:
            if is_audio_file(file):
                src_path = os.path.join(root, file)
                dst_path = os.path.join(target_dir, unique_filename(prefix, file))
                shutil.copy2(src_path, dst_path)

def main():
    # Initialize directories
    os.makedirs(REAL_TARGET, exist_ok=True)
    os.makedirs(FAKE_TARGET, exist_ok=True)

    # Process real/fake directories
    for root, dirs, _ in os.walk(DATASET_ROOT):
        if os.path.normpath(MERGED_ROOT) in os.path.normpath(root):
            continue  # Skip merged data directory

        basename = os.path.basename(root)
        if basename == "real":
            copy_audio_files(root, REAL_TARGET, "real")
        elif basename == "fake":
            copy_audio_files(root, FAKE_TARGET, "fake")

    # Process generated_audio (handles both root-level and nested 'generated' folders)
    generated_audio_dir = os.path.join(DATASET_ROOT, "generated_audio")
    if os.path.exists(generated_audio_dir):
        copy_audio_files(generated_audio_dir, FAKE_TARGET, "generated")

    print("Audio files merged successfully. Stats:")
    print(f"  Real: {len(os.listdir(REAL_TARGET))} files")
    print(f"  Fake: {len(os.listdir(FAKE_TARGET))} files")

if __name__ == "__main__":
    main()

Audio files merged successfully. Stats:
  Real: 156633 files
  Fake: 316629 files


In [7]:
# %pip install pydub
# %pip install ffmpeg-python

In [1]:
import os
import random
from pydub import AudioSegment
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm

# Configuration
SOURCE_BASE = "/teamspace/studios/this_studio/audio_detect/dataset/merged_data"
OUTPUT_BASE = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s"
AUDIO_EXTENSIONS = {'.wav', '.mp3', '.flac', '.ogg', '.m4a'}
TARGET_DURATION_MS = 2000
CLIPS_PER_CLASS = 20000
MAX_WORKERS = os.cpu_count() or 4

def is_audio_file(filename):
    return os.path.splitext(filename)[1].lower() in AUDIO_EXTENSIONS

def load_and_trim_save(args):
    file_path, output_dir, idx = args
    try:
        audio = AudioSegment.from_file(file_path)
        if len(audio) < TARGET_DURATION_MS:
            return None  # Skip
        audio = audio[:TARGET_DURATION_MS]
        out_path = os.path.join(output_dir, f"{idx:05d}.wav")
        audio.export(out_path, format="wav")
        return True
    except Exception:
        return None

def process_class_parallel(class_label):
    input_dir = os.path.join(SOURCE_BASE, class_label)
    output_dir = os.path.join(OUTPUT_BASE, class_label)
    os.makedirs(output_dir, exist_ok=True)

    all_files = [os.path.join(dp, f) for dp, _, files in os.walk(input_dir)
                 for f in files if is_audio_file(f)]
    random.shuffle(all_files)

    args_list = [(file, output_dir, i) for i, file in enumerate(all_files[:CLIPS_PER_CLASS * 2])]
    
    print(f"🔄 Processing {class_label} using {MAX_WORKERS} workers...")
    count = 0
    with ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
        for result in tqdm(executor.map(load_and_trim_save, args_list), total=len(args_list)):
            if result:
                count += 1
            if count >= CLIPS_PER_CLASS:
                break

    print(f"✅ {class_label.capitalize()} done: Saved {count} clips.")

def main():
    for label in ["real", "fake"]:
        process_class_parallel(label)

if __name__ == "__main__":
    main()


🔄 Processing real using 4 workers...


 64%|██████▍   | 25768/40000 [12:11<06:43, 35.23it/s]


✅ Real done: Saved 20000 clips.
🔄 Processing fake using 4 workers...


 64%|██████▍   | 25698/40000 [07:54<04:24, 54.12it/s] 


✅ Fake done: Saved 20000 clips.


In [4]:
import os
import random
from IPython.display import Audio

# Directory containing the audio files
real_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real"
fake_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake"

# Get random real and fake audio files
real_audio_file = random.choice(os.listdir(real_dir))
fake_audio_file = random.choice(os.listdir(fake_dir))

# Generate full paths to the selected files
real_audio_path = os.path.join(real_dir, real_audio_file)
fake_audio_path = os.path.join(fake_dir, fake_audio_file)

# Display the real audio
print(f"Playing real clip: {real_audio_file}")
Audio(real_audio_path)

# Display the fake audio
print(f"Playing fake clip: {fake_audio_file}")
Audio(fake_audio_path)


Playing real clip: 13559.wav
Playing fake clip: 25101.wav


In [5]:
import os

# Directories for real and fake audio files
real_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real"
fake_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake"

# Function to count files in a directory
def count_files_in_directory(directory):
    return len([f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))])

# Count files in both real and fake directories
real_count = count_files_in_directory(real_dir)
fake_count = count_files_in_directory(fake_dir)

print(f"Total number of real audio files: {real_count}")
print(f"Total number of fake audio files: {fake_count}")


Total number of real audio files: 21549
Total number of fake audio files: 20010


In [6]:
import os
import random
from pydub.utils import mediainfo

# Directories for real and fake audio files
real_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real"
fake_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake"

# Function to get audio duration
def get_audio_duration(file_path):
    info = mediainfo(file_path)
    return float(info['duration'])

# Function to get random clips and print their lengths
def check_random_clips_length(directory, num_samples=5):
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    random_files = random.sample(files, num_samples)
    
    print(f"\nChecking lengths of {num_samples} random clips from {directory}:")
    for file in random_files:
        file_path = os.path.join(directory, file)
        duration = get_audio_duration(file_path)
        print(f"Clip: {file}, Length: {duration:.2f} seconds")

# Check 5 random clips from real and fake directories
check_random_clips_length(real_dir)
check_random_clips_length(fake_dir)



Checking lengths of 5 random clips from /teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real:
Clip: 22841.wav, Length: 2.00 seconds
Clip: 12501.wav, Length: 2.00 seconds
Clip: 16901.wav, Length: 2.00 seconds
Clip: 18269.wav, Length: 2.00 seconds
Clip: 14459.wav, Length: 2.00 seconds

Checking lengths of 5 random clips from /teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake:
Clip: 25324.wav, Length: 2.00 seconds
Clip: 09263.wav, Length: 2.00 seconds
Clip: 03600.wav, Length: 2.00 seconds
Clip: 19014.wav, Length: 2.00 seconds
Clip: 10250.wav, Length: 2.00 seconds


In [7]:
import os
import random

# Directory for real and fake audio files
real_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real"
fake_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake"

# Get the list of all files in the real directory
real_files = [f for f in os.listdir(real_dir) if os.path.isfile(os.path.join(real_dir, f))]

# Number of fake files (since we want to keep the same number in the real directory)
num_fake_files = len([f for f in os.listdir(fake_dir) if os.path.isfile(os.path.join(fake_dir, f))])

# If there are more real files than fake files, remove the extra ones
if len(real_files) > num_fake_files:
    extra_files = random.sample(real_files, len(real_files) - num_fake_files)  # Select extra files to remove
    for file in extra_files:
        file_path = os.path.join(real_dir, file)
        os.remove(file_path)  # Remove the extra files
    print(f"Removed {len(extra_files)} extra files from the real directory.")
else:
    print("No extra files to remove. The number of real files is already equal to or less than the number of fake files.")


Removed 1539 extra files from the real directory.


In [8]:
import os

# Directories for real and fake audio files
real_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/real"
fake_dir = "/teamspace/studios/this_studio/audio_detect/dataset/processed_data_2s/fake"

# Function to count files in a directory
def count_files_in_directory(directory):
    return len([f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))])

# Count files in both real and fake directories
real_count = count_files_in_directory(real_dir)
fake_count = count_files_in_directory(fake_dir)

print(f"Total number of real audio files: {real_count}")
print(f"Total number of fake audio files: {fake_count}")


Total number of real audio files: 20010
Total number of fake audio files: 20010


In [None]:
# Plotting train/validation accuracy and loss
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label="Train Accuracy")
plt.plot(range(1, len(val_accuracies) + 1), val_accuracies, label="Val Accuracy")
plt.axhline(y=test_acc, color='g', linestyle='--', label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Train vs Validation vs Test Accuracy")
plt.legend()
plt.grid(True)

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(range(1, len(train_losses) + 1), train_losses, label="Train Loss")
plt.plot(range(1, len(val_losses) + 1), val_losses, label="Val Loss")
plt.axhline(y=test_loss, color='g', linestyle='--', label="Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Train vs Validation vs Test Loss")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig("training_plot.png")
print("Training plot saved as 'training_plot.png'")

# Inference function for new audio clips
def classify_audio_clip(audio_path, model, processor, device):
    """
    Classify a new audio clip as real or fake.
    
    Args:
        audio_path (str): Path to the audio file.
        model (Wav2Vec2Classifier): Trained model.
        processor (Wav2Vec2Processor): Wav2Vec2 processor.
        device (torch.device): Device to run inference on.
    
    Returns:
        tuple: (prediction, probability)
    """
    try:
        # Pre-check file validity
        sf.info(audio_path)
        # Load and preprocess audio
        waveform, sr = torchaudio.load(audio_path, backend="soundfile")
        if sr != 16000:
            resampler = torchaudio.transforms.Resample(sr, 16000)
            waveform = resampler(waveform)
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        waveform = waveform.squeeze(0)
        
        # Process with Wav2Vec2Processor
        inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True)
        input_values = inputs.input_values.to(device)
        attention_mask = inputs.attention_mask.to(device)
        
        # Inference
        model.eval()
        with torch.no_grad():
            logits = model(input_values, attention_mask)
            probs = torch.softmax(logits, dim=-1)
            pred = torch.argmax(probs, dim=-1).item()
            prob = probs[0, pred].item()
        
        return pred, prob
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        with open("error_log.txt", "a") as f:
            f.write(f"Error processing {audio_path}: {e}\n")
        return None, None

# Example inference
if os.path.exists(best_model_path):
    model.load_state_dict(torch.load(best_model_path, map_location=device))
    print("Loaded best model for inference")
    
    # Example: Replace with your audio file path
    test_audio = "/teamspace/studios/this_studio/audio_detect/test_audio.wav"
    if os.path.exists(test_audio):
        pred, prob = classify_audio_clip(test_audio, model, processor, device)
        if pred is not None:
            print(f"Audio: {test_audio}")
            print(f"Prediction: {'Fake' if pred == 1 else 'Real'}, Probability: {prob:.4f}")
else:
    print("No trained model found. Please train the model first.")