In [4]:
!pip install pydub requests



In [5]:
import shutil
folder_path = "/kaggle/working/recorded_audio"  # Folder to delete

try:
    shutil.rmtree(folder_path)
    print(f"Deleted folder: {folder_path}")
except FileNotFoundError:
    print("Folder not found, skipping deletion.")
except Exception as e:
    print(f"Error deleting folder: {e}")

Deleted folder: /kaggle/working/recorded_audio


In [6]:
import os
import time
import datetime
import requests
from pydub import AudioSegment
from subprocess import Popen, PIPE

# List of radio stations
RADIO_STATIONS = {
    "BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
    "Classic Rock": "http://stream-uk1.radioparadise.com/mp3-192",
    "Swiss Jazz" : "http://stream.srg-ssr.ch/m/rsj/mp3_128"
}

# Output directory
OUTPUT_DIR = "recorded_audio"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Duration (random between 15 and 30 seconds)
import random

def record_audio(station_name, stream_url, duration):
    """Records audio from a given stream URL for a specific duration using ffmpeg."""
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{station_name.replace(' ', '_')}_{timestamp}.mp3"
    filepath = os.path.join(OUTPUT_DIR, filename)

    print(f"Recording {station_name} for {duration} seconds...")
    
    command = [
        "ffmpeg",
        "-i", stream_url,
        "-t", str(duration),
        "-acodec", "mp3",
        "-y", filepath  # Overwrites existing file if any
    ]
    
    process = Popen(command, stdout=PIPE, stderr=PIPE)
    process.communicate()

    if os.path.exists(filepath):
        print(f"Saved: {filepath}")
        return filepath, duration, timestamp
    else:
        print("Recording failed.")
        return None, None, None

# Store metadata
metadata_file = os.path.join(OUTPUT_DIR, "metadata.csv")
with open(metadata_file, "w") as meta:
    meta.write("Station Name, File Name, Timestamp, Duration (s)\n")

# Record 30 samples
for i in range(30):
    station_name, stream_url = random.choice(list(RADIO_STATIONS.items()))
    duration = random.randint(30, 50)  # Random duration between 30-90s
    filepath, recorded_duration, timestamp = record_audio(station_name, stream_url, duration)

    if filepath:
        # Save metadata
        with open(metadata_file, "a") as meta:
            meta.write(f"{station_name}, {filepath}, {timestamp}, {recorded_duration}\n")

    time.sleep(5)  # Pause between recordings

print("Audio dataset collection complete.")


Recording Swiss Jazz for 42 seconds...
Saved: recorded_audio/Swiss_Jazz_20250307_174358.mp3
Recording Classic Rock for 36 seconds...
Saved: recorded_audio/Classic_Rock_20250307_174442.mp3
Recording Classic Rock for 45 seconds...
Saved: recorded_audio/Classic_Rock_20250307_174513.mp3
Recording Classic Rock for 35 seconds...
Saved: recorded_audio/Classic_Rock_20250307_174554.mp3
Recording BBC World Service for 42 seconds...
Saved: recorded_audio/BBC_World_Service_20250307_174624.mp3
Recording Classic Rock for 47 seconds...
Saved: recorded_audio/Classic_Rock_20250307_174704.mp3
Recording BBC World Service for 40 seconds...
Saved: recorded_audio/BBC_World_Service_20250307_174747.mp3
Recording Swiss Jazz for 34 seconds...
Saved: recorded_audio/Swiss_Jazz_20250307_174823.mp3
Recording BBC World Service for 37 seconds...
Saved: recorded_audio/BBC_World_Service_20250307_174858.mp3
Recording BBC World Service for 30 seconds...
Saved: recorded_audio/BBC_World_Service_20250307_174932.mp3
Recordin

In [7]:
import shutil
import os

# Define dataset path
dataset_dir = "/kaggle/working/recorded_audio"

# Ensure directory exists
os.makedirs(dataset_dir, exist_ok=True)
shutil.make_archive("/kaggle/working/audio_dataset_export", 'zip', dataset_dir)

In [8]:
shutil.make_archive("/kaggle/working/audio_dataset_export", 'zip', dataset_dir)


'/kaggle/working/audio_dataset_export.zip'