In [156]:
import os
import torchaudio
import random
import numpy as np

In [157]:
# Constants
AUDIO_PATH = 'record/nothing.wav'
OUTPUT_DIR = 'record/cut_audio'
CSV_NAME = 'nothing.csv'
FILE_NAME = 'nothing'
SAMPLE_RATE = 44100
SEGMENT_LENGTH = 2 * SAMPLE_RATE  # 2 seconds in samples
NUM_SEGMENTS = 100

In [158]:
# Create output directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [159]:
# Function to check if a segment is mostly silent
def is_silent(segment):
    return segment.abs().mean().item() < 0.005  # threshold for silence

In [160]:
# Load the audio file
waveform, sr = torchaudio.load(AUDIO_PATH, normalize=True)

In [161]:
# Extract random 2-second segments
segments = []
for _ in range(NUM_SEGMENTS):
    start_sample = random.randint(0, waveform.shape[1] - SEGMENT_LENGTH)
    segment = waveform[:, start_sample:start_sample+SEGMENT_LENGTH]
    if not is_silent(segment):
        segments.append(segment)


| New classID | Original classID | Original class      |
|-------------|------------------|---------------------|
| 0           | 0                | air_conditioner    |
| 1           | 1                | car_horn           |
| 2           | 2                | children_playing   |
| 3           | 3                | dog_bark           |
| 4           | 4                | drilling           |
| 5           | 5                | engine_idling      |
| 6           | 6                | gun_shot           |
| 7           | 7                | jackhammer         |
| 8           | 8                | siren              |
| 9           | 9                | street_music       |
| 10          | 11               | glass_shatter      |
| 11          | 12               | door_nock          |
| 12          | 13               | nothing            |
| 13          | 14               | nothing2           |
| 14          | 15               | bicycle_bell       |

In [162]:
# Save the segments to disk and populate CSV data
csv_data = []
for idx, segment in enumerate(segments):
    output_path = os.path.join(OUTPUT_DIR, f"{FILE_NAME}_cut_{idx}.wav")
    torchaudio.save(output_path, segment, SAMPLE_RATE)
    csv_data.append({
        'slice_file_name': f"{FILE_NAME}_cut_{idx}.wav",
        'fsID': 123456,
        'start': idx * 2,
        'end': (idx+1) * 2,
        'salience': 1,
        'fold': 'foldval',  # Assuming fold 10 for these segments
        'classID': 13,
        'class': 'nothing'
    })

In [163]:
# Save CSV data
import pandas as pd
csv_output_path = os.path.join(OUTPUT_DIR, CSV_NAME)
csv_df = pd.DataFrame(csv_data)
csv_df.to_csv(csv_output_path, index=False)

In [164]:
csv_output_path

'record/cut_audio/nothing.csv'