In [1]:
import os
os.chdir("D:/data")  # 切换到 D 盘的 data 目录
print(os.getcwd())

D:\data


In [5]:
import os
import pandas as pd

base_dir = "D:/DATA"
loc_folders = [f"loc{i}" for i in range(1, 7)]
splits = ["train", "val"]

all_entries = []

for loc in loc_folders:
    for split in splits:
        csv_path = os.path.join(base_dir, loc, f"{split}.csv")
        if not os.path.exists(csv_path):
            print(f"Missing: {csv_path}")
            continue

        df = pd.read_csv(csv_path)

        # Calculate total number of cars and CVs
        df["car_total"] = df["car_left"] + df["car_right"]
        df["cv_total"] = df["cv_left"] + df["cv_right"]

        # Assign label based on vehicle type
        def classify(row):
            if row["car_total"] > 0 and row["cv_total"] == 0:
                return 0  # only car
            elif row["cv_total"] > 0 and row["car_total"] == 0:
                return 1  # only CV
            else:
                return 2  # mixed or none

        df["label"] = df.apply(classify, axis=1)

        # Generate absolute path (path field includes .flac already)
        df["abs_path"] = df["path"].apply(lambda x: os.path.join(base_dir, loc, x.replace("/", os.sep)))

        # Add metadata
        df["loc"] = loc
        df["split"] = split

        all_entries.append(df[["abs_path", "label", "loc", "split"]])

# Combine all rows
all_data = pd.concat(all_entries, ignore_index=True)

# Save to unified CSV
output_csv = os.path.join(base_dir, "all_audio_labels.csv")
all_data.to_csv(output_csv, index=False)
print(f"Saved unified label file: {output_csv}")

Saved unified label file: D:/DATA\all_audio_labels.csv


In [6]:
import os
import pandas as pd
import librosa
import soundfile as sf
import numpy as np
from tqdm import tqdm

def normalize_energy(energy_array):
    return (energy_array - np.min(energy_array)) / (np.max(energy_array) - np.min(energy_array) + 1e-8)

# Process a .flac file and extract 1s segments above energy threshold
def process_flac_and_save_segments(row, output_base, segment_duration=1.0, sr=16000, baseline_secs=2, threshold_ratio=0.2):
    abs_path = row["abs_path"]
    label = int(row["label"])
    loc = row["loc"]
    split = row["split"]
    
    if label not in [0, 1]:
        return []

    try:
        y, _ = librosa.load(abs_path, sr=sr)
    except Exception as e:
        print(f"Error loading {abs_path}: {e}")
        return []

    samples_per_seg = int(sr * segment_duration)
    num_segments = len(y) // samples_per_seg
    energies = [np.sqrt(np.mean(y[i*samples_per_seg:(i+1)*samples_per_seg]**2)) for i in range(num_segments)]
    energies = np.array(energies)
    energies_norm = normalize_energy(energies)
    baseline = np.mean(energies_norm[:baseline_secs])
    threshold = baseline + threshold_ratio
    smoothed = np.convolve(energies_norm, np.ones(3)/3, mode='same')
    active = smoothed > threshold

    entries = []
    for i, is_active in enumerate(active):
        if not is_active:
            continue

        start = i * samples_per_seg
        end = start + samples_per_seg
        segment = y[start:end]

        class_dir = "car" if label == 0 else "cv"
        filename = f"{loc}_{split}_{os.path.basename(abs_path).replace('.flac','')}_sec{i+1:02d}.wav"
        out_dir = os.path.join(output_base, class_dir)
        os.makedirs(out_dir, exist_ok=True)
        out_path = os.path.join(out_dir, filename)

        sf.write(out_path, segment, sr)
        entries.append({"filepath": out_path, "label": label})
    
    return entries

# ==== Main Process ====

input_csv = "D:/DATA/all_audio_labels.csv"
output_base = "D:/DATA/vehicle_segments"
output_csv = os.path.join(output_base, "vehicle_clips.csv")

df = pd.read_csv(input_csv)
df = df[df["label"].isin([0, 1])]  # process only pure vehicle samples

all_entries = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    entries = process_flac_and_save_segments(row, output_base)
    all_entries.extend(entries)

# Save final CSV
df_out = pd.DataFrame(all_entries)
df_out.to_csv(output_csv, index=False)
print(f"\n✅ Saved segmented vehicle clips to {output_csv}")


100%|████████████████████████████████████████████████████████████████████████████| 10076/10076 [14:07<00:00, 11.89it/s]



✅ Saved segmented vehicle clips to D:/DATA/vehicle_segments\vehicle_clips.csv


In [8]:
import pandas as pd

csv_path = "D:/DATA/vehicle_segments/vehicle_clips.csv"
df = pd.read_csv(csv_path)

car_count = (df["label"] == 0).sum()
cv_count = (df["label"] == 1).sum()

print(f"Number of car segments (label=0): {car_count}")
print(f"Number of cv segments (label=1):  {cv_count}")


Number of car segments (label=0): 108827
Number of cv segments (label=1):  1778


In [9]:
import os
import pandas as pd
import librosa
import soundfile as sf
from tqdm import tqdm

# Path config
segments_csv = "D:/DATA/vehicle_segments/vehicle_clips.csv"
original_meta_csv = "D:/DATA/all_audio_labels.csv"
output_dir = "D:/DATA/vehicle_segments/background"
output_csv = "D:/DATA/vehicle_segments/vehicle_clips_background.csv"
segment_duration = 1.0
sr = 16000
segment_samples = int(sr * segment_duration)

# Load CSVs
df_segments = pd.read_csv(segments_csv)
df_meta = pd.read_csv(original_meta_csv)

# Index for used vehicle seconds
segment_index = {}
for path in df_segments["filepath"]:
    fname = os.path.basename(path)
    parts = fname.split("_sec")
    if len(parts) != 2:
        continue
    key = "_".join(parts[0].split("_")[:3])  # loc1_train_00001
    sec = int(parts[1].replace(".wav", ""))
    segment_index.setdefault(key, set()).add(sec)

# Process each original .flac and extract unused segments
all_background_entries = []
os.makedirs(output_dir, exist_ok=True)

for _, row in tqdm(df_meta.iterrows(), total=len(df_meta)):
    abs_path = row["abs_path"]
    loc = row["loc"]
    split = row["split"]
    base = os.path.basename(abs_path).replace(".flac", "")
    key = f"{loc}_{split}_{base}"

    try:
        y, _ = librosa.load(abs_path, sr=sr)
    except Exception as e:
        print(f"Error loading {abs_path}: {e}")
        continue

    total_segments = len(y) // segment_samples
    active_secs = segment_index.get(key, set())

    for sec in range(1, total_segments + 1):
        if sec in active_secs:
            continue  # Skip used vehicle seconds

        start = (sec - 1) * segment_samples
        end = start + segment_samples
        segment = y[start:end]

        outname = f"{key}_sec{sec:02d}.wav"
        outpath = os.path.join(output_dir, outname)
        sf.write(outpath, segment, sr)

        all_background_entries.append({
            "filepath": outpath.replace("\\", "/"),
            "label": 2  # background
        })

# Save background label file
df_out = pd.DataFrame(all_background_entries)
df_out.to_csv(output_csv, index=False)
print(f"\n✅ Saved background clips: {len(df_out)} to {output_csv}")


100%|████████████████████████████████████████████████████████████████████████████| 15013/15013 [28:13<00:00,  8.87it/s]



✅ Saved background clips: 790175 to D:/DATA/vehicle_segments/vehicle_clips_background.csv


In [10]:
import os
import pandas as pd
import numpy as np
import librosa
import soundfile as sf
import random
from tqdm import tqdm

# Config
target_total = 100000
sr = 16000
segment_duration = 1.0
samples_per_segment = int(sr * segment_duration)

# Paths
base_dir = "D:/DATA/vehicle_segments"
original_csv = os.path.join(base_dir, "vehicle_clips.csv")
background_dir = os.path.join(base_dir, "background")
cv_aug_dir = os.path.join(base_dir, "cv_aug")
os.makedirs(cv_aug_dir, exist_ok=True)

# Load data
df = pd.read_csv(original_csv)
df_cv = df[df["label"] == 1].reset_index(drop=True)
df_car = df[df["label"] == 0]

# Background pool
background_files = [os.path.join(background_dir, f) for f in os.listdir(background_dir) if f.endswith(".wav")]

# Augmentation functions
def augment_with_background(y, bg, snr_db=5):
    if len(bg) < len(y):
        bg = np.tile(bg, int(np.ceil(len(y)/len(bg))))
    bg = bg[:len(y)]
    rms_y = np.sqrt(np.mean(y**2))
    rms_bg = np.sqrt(np.mean(bg**2))
    alpha = rms_y / (10**(snr_db/20)) / (rms_bg + 1e-6)
    return y + alpha * bg

def time_stretch(y, rate):
    return librosa.effects.time_stretch(y, rate)

def pitch_shift(y, sr, n_steps):
    return librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

def reverse(y):
    return y[::-1]

# Start augmentation loop
aug_entries = []
existing = len(df_cv)
needed = target_total - existing
cv_paths = df_cv["filepath"].tolist()

while len(aug_entries) < needed:
    for path in cv_paths:
        if len(aug_entries) >= needed:
            break
        try:
            y, _ = librosa.load(path, sr=sr)
        except:
            continue

        aug_y = y.copy()

        # Random augmentation combos
        if random.random() < 0.6 and background_files:
            bg_path = random.choice(background_files)
            bg_y, _ = librosa.load(bg_path, sr=sr)
            aug_y = augment_with_background(aug_y, bg_y)

        if random.random() < 0.5:
            rate = random.uniform(0.9, 1.1)
            try:
                aug_y = time_stretch(aug_y, rate)
            except:
                continue

        if random.random() < 0.5:
            steps = random.randint(-2, 2)
            aug_y = pitch_shift(aug_y, sr=sr, n_steps=steps)

        if random.random() < 0.3:
            aug_y = reverse(aug_y)

        # Save augmented segment
        base_name = os.path.splitext(os.path.basename(path))[0]
        aug_name = f"{base_name}_aug{len(aug_entries)}.wav"
        aug_path = os.path.join(cv_aug_dir, aug_name)
        sf.write(aug_path, aug_y, sr)

        aug_entries.append({
            "filepath": aug_path.replace("\\", "/"),
            "label": 1
        })

# Save final CSV
df_aug = pd.DataFrame(aug_entries)
df_final = pd.concat([df, df_aug], ignore_index=True)
df_final.to_csv(original_csv, index=False)
print(f"✅ Augmentation completed! Generated {len(df_aug)} CV samples and saved to vehicle_clips.csv")


✅ 增强完成！生成 CV 增强样本：98222 条，已写入 vehicle_clips.csv
