In [None]:
import zipfile
import os

# Path to the uploaded zip file
zip_file_path = '/content/cry_data.zip'
extract_path = '/content/my_data'

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the files that were extracted
extracted_files = os.listdir(extract_path)
extracted_files

['discomfort', 'tired', 'belly_pain', 'burping', 'hungry']

In [None]:
import os
import pandas as pd


In [None]:
# Define the base directory where your audio files are stored
base_dir = '/content/my_data'

In [None]:

# Initialize lists to store file paths and class labels
audio_paths = []
audio_classes = []

# Iterate through each class folder and collect audio paths and labels
for class_name in os.listdir(base_dir):
    class_dir = os.path.join(base_dir, class_name)

    # Check if it's a directory (i.e., a class folder)
    if os.path.isdir(class_dir):
        for audio_file in os.listdir(class_dir):
            # Append file path and class label
            audio_paths.append(os.path.join(class_name, audio_file))  
            audio_classes.append(class_name)  


In [None]:

# Create a DataFrame
data = {'audio_path': audio_paths, 'audio_class': audio_classes}
df = pd.DataFrame(data)

# Save to CSV
csv_file_path = '/content/audio_data.csv'
df.to_csv(csv_file_path, index=False)

print(f"CSV file created at {csv_file_path}")


CSV file created at /content/audio_data.csv


In [None]:
df.head(5)

Unnamed: 0,audio_path,audio_class
0,discomfort/D1CB71F9-EF76-49B3-A703-41D557444D2...,discomfort
1,discomfort/ae5a462b-5424-4b5b-82d5-07ccb61654a...,discomfort
2,discomfort/837fd072-8704-4196-9ff1-1d2c07886e5...,discomfort
3,discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...,discomfort
4,discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...,discomfort


In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [None]:
import pandas as pd

# Load the CSV file
csv_file_path = '/content/audio_data.csv'
df = pd.read_csv(csv_file_path)

# Display the first few rows to check the structure
print(df.head())


                                          audio_path audio_class
0  discomfort/D1CB71F9-EF76-49B3-A703-41D557444D2...  discomfort
1  discomfort/ae5a462b-5424-4b5b-82d5-07ccb61654a...  discomfort
2  discomfort/837fd072-8704-4196-9ff1-1d2c07886e5...  discomfort
3  discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...  discomfort
4  discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...  discomfort


In [None]:
# Add "/content/" prefix to each path in the audio_path column
df['audio_path'] = "/content/" + df['audio_path']

df.head(5)

Unnamed: 0,audio_path,audio_class
0,/content//content/discomfort/D1CB71F9-EF76-49B...,discomfort
1,/content//content/discomfort/ae5a462b-5424-4b5...,discomfort
2,/content//content/discomfort/837fd072-8704-419...,discomfort
3,/content//content/discomfort/1309B82C-F146-46F...,discomfort
4,/content//content/discomfort/1309B82C-F146-46F...,discomfort


In [None]:
# Load the CSV file
csv_path = '/content/audio_data.csv'
audio_data = pd.read_csv(csv_path)

In [None]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt

# Define the base directory to save Mel Spectrograms by class
mel_spec_dir = '/content/mel_spectos'
os.makedirs(mel_spec_dir, exist_ok=True)

# Function to generate and save Mel Spectrogram
def save_mel_spectrogram(audio_file, output_path, sr=22050, duration=5):
    y, sr = librosa.load(audio_file, sr=sr, duration=duration)
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

    # Save the Mel Spectrogram as an image
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel Spectrogram')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

# Check each audio file and generate Mel Spectrogram if the file exists
for idx, row in df.iterrows():
    # Construct the full path to the audio file
    audio_file = os.path.join('/content/extracted_files', row['audio_path'])

    # Check if the file exists
    if not os.path.exists(audio_file):
        print(f"File not found: {audio_file}")
        continue  # Skip to the next file if missing

    class_dir = os.path.join(mel_spec_dir, row['audio_class'])
    os.makedirs(class_dir, exist_ok=True) 
    mel_spec_output_path = os.path.join(class_dir, f'mel_spectrogram_{idx}.png')

    # Generate and save Mel Spectrogram
    try:
        save_mel_spectrogram(audio_file, mel_spec_output_path)
        print(f"Saved Mel Spectrogram for {audio_file} at {mel_spec_output_path}")
    except Exception as e:
        print(f"Error processing {audio_file}: {e}")


In [None]:
audio_data.head(5)

Unnamed: 0,audio_path,audio_class
0,discomfort/D1CB71F9-EF76-49B3-A703-41D557444D2...,discomfort
1,discomfort/ae5a462b-5424-4b5b-82d5-07ccb61654a...,discomfort
2,discomfort/837fd072-8704-4196-9ff1-1d2c07886e5...,discomfort
3,discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...,discomfort
4,discomfort/1309B82C-F146-46F0-A723-45345AFA6EA...,discomfort


In [None]:
# Add "/content/" prefix to each path in the audio_path column
audio_data['audio_path'] = "/content/" + audio_data['audio_path']

audio_data.head(5)

Unnamed: 0,audio_path,audio_class
0,/content/discomfort/D1CB71F9-EF76-49B3-A703-41...,discomfort
1,/content/discomfort/ae5a462b-5424-4b5b-82d5-07...,discomfort
2,/content/discomfort/837fd072-8704-4196-9ff1-1d...,discomfort
3,/content/discomfort/1309B82C-F146-46F0-A723-45...,discomfort
4,/content/discomfort/1309B82C-F146-46F0-A723-45...,discomfort


In [None]:
df = audio_data

In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

output_dir = '/content/mel_spectrograms'
os.makedirs(output_dir, exist_ok=True)

for _, row in tqdm(audio_data.iterrows(), total=len(audio_data)):
    audio_path = row['audio_path']
    audio_class = row['audio_class']

    class_dir = os.path.join(output_dir, audio_class)
    os.makedirs(class_dir, exist_ok=True)

    y, sr = librosa.load(audio_path, sr=None)

    # Generate the mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    S_dB = librosa.power_to_db(S, ref=np.max)

    
    file_name = os.path.splitext(os.path.basename(audio_path))[0] + '.png'
    save_path = os.path.join(class_dir, file_name)

   
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Mel Spectrogram - {audio_class}')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

print(f"Mel spectrograms have been saved in '{output_dir}' folder, organized by class.")


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
100%|██████████| 690/690 [05:04<00:00,  2.26it/s]

Mel spectrograms have been saved in '/content/mel_spectrograms' folder, organized by class.





In [None]:
import shutil

# Define the folder you want to zip and the output zip file path
output_dir = '/content/mel_spectrograms'
zip_file_path = '/content/mel_spectrograms.zip'

# Zip the folder
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', output_dir)

print(f"Zipped folder saved as '{zip_file_path}'")


Zipped folder saved as '/content/mel_spectrograms.zip'
