In [1]:
import subprocess
import datetime
import csv
import os

# List of radio stations with names and streaming URLs
stations = [
    {"name": "BBC_World_Service", "url": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service"},
    {"name": "NPR_News", "url": "https://npr-ice.streamguys1.com/live.mp3"},
    {"name": "Classical_Music", "url": "http://icecast.somafm.com/classical"},
    {"name": "Jazz_Lounge", "url": "http://icecast.somafm.com/jazzlounge"},
    {"name": "Rock_Radio", "url": "http://streaming.radio.co/s3dbefb5b8/listen"}
]

# Full path to ffmpeg executable. Ensure this path is correct!
ffmpeg_path = r"C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe"

# Check if ffmpeg executable exists
if not os.path.exists(ffmpeg_path):
    raise FileNotFoundError(f"ffmpeg not found at: {ffmpeg_path}")

# Output directory for audio files
output_dir = "audio_dataset"
os.makedirs(output_dir, exist_ok=True)

# Create a metadata CSV file
metadata_file = os.path.join(output_dir, "metadata.csv")

with open(metadata_file, mode="w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["station_name", "timestamp", "duration", "filename"])  # CSV header

    # Loop through each station and record the stream
    for station in stations:
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")  # Unique timestamp
        filename = f"{station['name']}_{timestamp}.mp3"  # File name
        filepath = os.path.join(output_dir, filename)
        duration = 30  # Duration in seconds

        # ffmpeg command to record the audio
        cmd = [
            ffmpeg_path,      # Full path to ffmpeg.exe
            "-y",             # Overwrite existing files
            "-i", station["url"],
            "-t", str(duration),
            "-acodec", "mp3", # Save as MP3 format
            filepath
        ]
        
        # Print the command for debugging purposes
        print(f"Executing command: {' '.join(cmd)}")
        print(f"Recording from {station['name']} for {duration} seconds...")
        
        # Run the command (no suppression of stdout/stderr for debugging)
        subprocess.run(cmd)
        
        # Save metadata to CSV
        writer.writerow([station["name"], timestamp, duration, filename])

print("Recording completed. Audio files and metadata saved in 'audio_dataset' folder.")


Executing command: C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe -y -i http://stream.live.vc.bbcmedia.co.uk/bbc_world_service -t 30 -acodec mp3 audio_dataset\BBC_World_Service_20250317_233404.mp3
Recording from BBC_World_Service for 30 seconds...
Executing command: C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe -y -i https://npr-ice.streamguys1.com/live.mp3 -t 30 -acodec mp3 audio_dataset\NPR_News_20250317_233425.mp3
Recording from NPR_News for 30 seconds...
Executing command: C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe -y -i http://icecast.somafm.com/classical -t 30 -acodec mp3 audio_dataset\Classical_Music_20250317_233457.mp3
Recording from Classical_Music for 30 seconds...
Executing command: C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe -y -i http://icecast.somafm.com/jazzlounge -t 30 -acodec mp3 audio_dataset\Jazz_Lounge_20250317_233457.mp3
Recording from Jazz_Lounge for 30 seconds...
Executing command: C:\ffmpeg\ffmpeg-master-latest-win64-gpl\ffmpeg.exe -y

In [2]:
import os
print("Current working directory:", os.getcwd())


Current working directory: c:\mmdp2 project assignment\Scalable Data Collection\notebooks
