## This code is meant to convert the webm files' audio into mel-spectrogram images

In [1]:
%cd ./public_dataset_v3/coughvid_20211012

/Users/ethansoroko/Desktop/archive/public_dataset_v3/coughvid_20211012


In [2]:
import pandas as pd
import numpy as np
import os
import csv
from dotenv import load_dotenv
import librosa
import librosa.display
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import subprocess
import warnings

# Suppress UserWarnings and FutureWarnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
df = pd.read_csv("metadata_compiled.csv")
df_cleaned = df.dropna(subset=['status'])
df_cleaned_labels = df_cleaned[['uuid', 'status']]
df_cleaned_ohe = pd.get_dummies(df_cleaned_labels, columns=['status'])

In [4]:
df_cleaned_ohe.head

<bound method NDFrame.head of                                        uuid  status_COVID-19  status_healthy  \
1      00039425-7f3a-42aa-ac13-834aaa2b6b92            False            True   
2      0007c6f1-5441-40e6-9aaf-a761d8f2da3b            False            True   
3      00098cdb-4da1-4aa7-825a-4f1b9abc214b            False            True   
4      0009eb28-d8be-4dc1-92bb-907e53bc5c7a            False            True   
6      001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f            False            True   
...                                     ...              ...             ...   
34426  ffeea120-92a4-40f9-b692-c3865c7a983f            False            True   
34427  fff13fa2-a725-49ef-812a-39c6cedda33d            False            True   
34429  fff3ff61-2387-4139-938b-539db01e6be5            False           False   
34431  fffaa9f8-4db0-46c5-90fb-93b7b014b55d            False            True   
34433  fffd1834-6cf2-4782-bb30-04c31b46a1fe            False            True   

       st

In [5]:
def get_file_extensions(directory):
  """
  Loops through all files in a directory and returns a set of unique file extensions.

  Args:
    directory: The path to the directory to search.

  Returns:
    A set of unique file extensions found in the directory.
  """
  extensions = set()
  for filename in os.listdir(directory):
    if os.path.isfile(os.path.join(directory, filename)):
      _, ext = os.path.splitext(filename)
      extensions.add(ext.lower())
  return extensions

directory_path = "."  
file_extensions = get_file_extensions(directory_path)
print(f"File extensions in '{directory_path}': {file_extensions}")

File extensions in '.': {'', '.webm', '.json', '.ogg', '.wav', '.csv'}


In [8]:
def extract_audio_from_webm(webm_file):
    """
    Extract audio from a .webm file using ffmpeg and return the audio data.
    """
    # Use ffmpeg to extract audio to a temporary wav file
    temp_audio_path = "temp_audio.wav"
    subprocess.run(
            ['ffmpeg', '-i', webm_file, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', temp_audio_path],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )    
    # Load the extracted audio using librosa
    y, sr = librosa.load(temp_audio_path, sr=None)

    # Delete temporary audio file
    os.remove(temp_audio_path)

    return y, sr

def create_spectrogram(audio_path, output_path, n_mels=128, fmax=8000):
    """
    Create a mel-spectrogram from the audio and save it as an image.
    """
    if audio_path.lower().endswith('.webm'):
        y, sr = extract_audio_from_webm(audio_path)
    else:
        y, sr = librosa.load(audio_path, sr=None)
    
    # Compute the mel-spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    # Plot and save the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', fmax=fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-Spectrogram')
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()

def process_by_uuid_df(df, input_folder, output_folder):
    """
    Process UUIDs in the DataFrame and generate spectrograms for the audio files.
    """
    os.makedirs(output_folder, exist_ok=True)

    uuids = df['uuid'].astype(str)

    index = 1
    for uuid in uuids:
        found = False
        for ext in ['.wav', '.ogg', '.webm']:  # Include .webm
            audio_filename = uuid + ext
            audio_path = os.path.join(input_folder, audio_filename)
            if os.path.exists(audio_path):
                output_path = os.path.join(output_folder, f"{uuid}.png")
                print(f"{index}: Processing {audio_filename} -> {uuid}.png")
                create_spectrogram(audio_path, output_path)
                found = True
                break
        
        if not found:
            print(f"Audio file for UUID {uuid} not found in supported formats.")
        index += 1

In [None]:

load_dotenv()
input_folder = os.getcwd()  
output_folder = os.path.join(os.getcwd(), 'spectrograms')  # Create 'spectrograms' subfolder

process_by_uuid_df(df_cleaned_ohe, input_folder, output_folder)
print("Finished processing UUID-based spectrograms.")

1: Processing 00039425-7f3a-42aa-ac13-834aaa2b6b92.webm -> 00039425-7f3a-42aa-ac13-834aaa2b6b92.png
2: Processing 0007c6f1-5441-40e6-9aaf-a761d8f2da3b.webm -> 0007c6f1-5441-40e6-9aaf-a761d8f2da3b.png
3: Processing 00098cdb-4da1-4aa7-825a-4f1b9abc214b.wav -> 00098cdb-4da1-4aa7-825a-4f1b9abc214b.png
4: Processing 0009eb28-d8be-4dc1-92bb-907e53bc5c7a.webm -> 0009eb28-d8be-4dc1-92bb-907e53bc5c7a.png
5: Processing 001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f.webm -> 001328dc-ea5d-4847-9ccf-c5aa2a3f2d0f.png
6: Processing 001d8e33-a4af-4edb-98ba-b03f891d9a6c.webm -> 001d8e33-a4af-4edb-98ba-b03f891d9a6c.png
7: Processing 001e2f19-d81c-4029-b33c-d2db56b23a4a.webm -> 001e2f19-d81c-4029-b33c-d2db56b23a4a.png
8: Processing 0028b68c-aca4-4f4f-bb1d-cb4ed5bbd952.webm -> 0028b68c-aca4-4f4f-bb1d-cb4ed5bbd952.png
9: Processing 00291cce-36a0-4a29-9e2d-c1d96ca17242.webm -> 00291cce-36a0-4a29-9e2d-c1d96ca17242.png
10: Processing 0029d048-898a-4c70-89c7-0815cdcf7391.webm -> 0029d048-898a-4c70-89c7-0815cdcf7391.png
