In [None]:
import zipfile
import os

# Path to the uploaded zip file
zip_file_path = '/content/archive (7).zip'
extract_path = '/content'

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the files that were extracted
extracted_files = os.listdir(extract_path)
extracted_files

['.config', 'archive (7).zip', 'donateacry_corpus', 'sample_data']

In [None]:
import os
from pydub import AudioSegment

In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:


# Specify the main directory path containing the class subfolders
main_directory = '/content/donateacry_corpus'

# Iterate over each subfolder in the main directory
for subfolder in os.listdir(main_directory):
    subfolder_path = os.path.join(main_directory, subfolder)

    # Check if it's a directory (i.e., class folder)
    if os.path.isdir(subfolder_path):
        # Iterate through each audio file in the subfolder
        for file in os.listdir(subfolder_path):
            if file.endswith(('.wav', '.mp3', '.flac', '.ogg')):  # Check for audio files
                file_path = os.path.join(subfolder_path, file)

                # Load the audio file
                audio = AudioSegment.from_file(file_path)

                # Apply an example augmentation, e.g., increase volume by 5 dB
                augmented_audio = audio + 5

                # Define the new file path with '_augmented' appended to the original filename
                augmented_filename = file.replace('.', '_augmented.')
                augmented_path = os.path.join(subfolder_path, augmented_filename)

                # Export the augmented audio to the same folder as the original
                augmented_audio.export(augmented_path, format=file.split('.')[-1])

                print(f"Augmented audio created at: {augmented_path}")


Augmented audio created at: /content/donateacry_corpus/discomfort/837fd072-8704-4196-9ff1-1d2c07886e55-1432429478471-1_augmented.7-m-22-dc_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/f258a8e2-92cd-4843-9da4-80ed26ec8e10-1436944996156-1_augmented.7-f-26-ch_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/1309B82C-F146-46F0-A723-45345AFA6EA8-1430703937-1_augmented.0-f-48-dc_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/999bf14b-e417-4b44-b746-9253f81efe38-1430844979531-1_augmented.7-m-04-ch_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/999bf14b-e417-4b44-b746-9253f81efe38-1430844958178-1_augmented.7-m-04-ch_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/d6cda191-4962-4308-9a36-46d5648a95ed-1431161889263-1_augmented.7-m-04-dc_augmented.wav
Augmented audio created at: /content/donateacry_corpus/discomfort/999bf14b-e417-4b44-

In [None]:


# Set the path to the main directory and the augmentation threshold
main_directory = '/content/donateacry_corpus'
augmentation_threshold = 50  # Number of samples below which augmentation should be applied

# Iterate over each subfolder in the main directory
for subfolder in os.listdir(main_directory):
    subfolder_path = os.path.join(main_directory, subfolder)

    # Check if it's a directory (i.e., class folder)
    if os.path.isdir(subfolder_path):
        # Count the number of audio files in the class folder
        audio_files = [file for file in os.listdir(subfolder_path) if file.endswith(('.wav', '.mp3', '.flac', '.ogg'))]
        num_files = len(audio_files)

        # Skip augmentation if the folder already has enough samples
        if num_files >= augmentation_threshold:
            print(f"Skipping augmentation for '{subfolder}' (has {num_files} samples).")
            continue

        # Apply augmentation for classes with fewer samples than the threshold
        print(f"Augmenting files in '{subfolder}' (has {num_files} samples).")
        for file in audio_files:
            file_path = os.path.join(subfolder_path, file)

            # Load the audio file
            audio = AudioSegment.from_file(file_path)

            # Example augmentation: Increase volume by 5 dB
            augmented_audio = audio + 5

            # Define new file path with '_augmented' appended to the filename
            augmented_filename = file.replace('.', '_augmented.')
            augmented_path = os.path.join(subfolder_path, augmented_filename)

            # Export the augmented audio to the same folder
            augmented_audio.export(augmented_path, format=file.split('.')[-1])
            print(f"Created augmented file: {augmented_path}")


In [None]:
import os
import random
from pydub import AudioSegment
import numpy as np

# Path to main directory and desired number of samples per class
main_directory = '/content/donateacry_corpus'
target_sample_count = 100  # Target number of samples per class

# Function to add noise
def add_noise(audio, noise_level=0.05):
    noise = np.random.normal(0, audio.max_possible_amplitude * noise_level, len(audio.get_array_of_samples()))
    return audio.overlay(AudioSegment(noise.tobytes(), frame_rate=audio.frame_rate, sample_width=audio.sample_width, channels=audio.channels))

# Function to change speed
def change_speed(audio, speed=1.25):
    return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed)}).set_frame_rate(audio.frame_rate)

# Function to change pitch
def change_pitch(audio, semitones=2):
    new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
    return audio._spawn(audio.raw_data, overrides={'frame_rate': new_sample_rate}).set_frame_rate(audio.frame_rate)

# Iterate over each subfolder in the main directory
for subfolder in os.listdir(main_directory):
    subfolder_path = os.path.join(main_directory, subfolder)

    if os.path.isdir(subfolder_path):
        # Get list of audio files in the class folder
        audio_files = [file for file in os.listdir(subfolder_path) if file.endswith(('.wav', '.mp3', '.flac', '.ogg'))]
        num_files = len(audio_files)

        if num_files >= target_sample_count:
            print(f"Skipping augmentation for '{subfolder}' (has {num_files} samples).")
            continue

        print(f"Augmenting '{subfolder}' to reach {target_sample_count} samples.")

        # Repeat augmentations until the target sample count is reached
        while num_files < target_sample_count:
            for file in audio_files:
                if num_files >= target_sample_count:
                    break

                file_path = os.path.join(subfolder_path, file)
                audio = AudioSegment.from_file(file_path)

                # Randomly apply different augmentations
                augmentation_choice = random.choice(['noise', 'speed', 'pitch'])

                if augmentation_choice == 'noise':
                    augmented_audio = add_noise(audio, noise_level=0.05)
                    augmented_filename = file.replace('.', '_noise_augmented.')

                elif augmentation_choice == 'speed':
                    speed = random.choice([0.9, 1.1, 1.2])  # Random speed changes
                    augmented_audio = change_speed(audio, speed=speed)
                    augmented_filename = file.replace('.', f'_speed{speed}_augmented.')

                elif augmentation_choice == 'pitch':
                    semitones = random.choice([-2, -1, 1, 2])  # Random pitch changes
                    augmented_audio = change_pitch(audio, semitones=semitones)
                    augmented_filename = file.replace('.', f'_pitch{semitones}_augmented.')

                augmented_path = os.path.join(subfolder_path, augmented_filename)

                # Export augmented audio
                augmented_audio.export(augmented_path, format=file.split('.')[-1])
                num_files += 1

                print(f"Created augmented file: {augmented_path}")


Augmenting 'discomfort' to reach 100 samples.
Created augmented file: /content/donateacry_corpus/discomfort/d6cda191-4962-4308-9a36-46d5648a95ed-1426942891562-1_noise_augmented.4-f-48-dc_noise_augmented.wav
Created augmented file: /content/donateacry_corpus/discomfort/79FF400A-97E2-4701-987D-C7C850D5523C-1430089621-1_pitch2_augmented.0-f-48-dc_pitch2_augmented.wav
Created augmented file: /content/donateacry_corpus/discomfort/837fd072-8704-4196-9ff1-1d2c07886e55-1432429478471-1_pitch-2_augmented.7-m-22-dc_pitch-2_augmented.wav
Created augmented file: /content/donateacry_corpus/discomfort/7b0e160e-0505-459e-8ecb-304d7afae9d2-1437486974312-1_pitch1_augmented.7-m-04-dc_pitch1_augmented.wav
Created augmented file: /content/donateacry_corpus/discomfort/ae5a462b-5424-4b5b-82d5-07ccb61654ab-1431064345173-1_speed1.1_augmented.7-f-48-dc_speed1.1_augmented.wav
Created augmented file: /content/donateacry_corpus/discomfort/79FF400A-97E2-4701-987D-C7C850D5523C-1430089688-1_noise_augmented.0-f-48-dc_

In [None]:
import shutil
import os

In [None]:


# Path to the main directory containing the class subfolders
main_directory = '/content/donateacry_corpus'

# Path where you want to save the zip file
zip_file_path = '/content'

# Create a zip file of the entire folder
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', main_directory)

print(f"ZIP file created at: {zip_file_path}")


ZIP file created at: /content


In [None]:
os.listdir("/content")

['.config', 'archive (7).zip', 'donateacry_corpus', 'sample_data']

In [None]:
import shutil
from google.colab import files

# Path to the main directory containing the class subfolders
main_directory = '/content/donateacry_corpus'# Change to your folder path in Colab

# Path where you want to save the zip file
zip_file_path = '/content/cry_data.zip'

# Create a zip file of the entire folder
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', main_directory)

print(f"ZIP file created at: {zip_file_path}")

# Download the ZIP file
files.download(zip_file_path)


ZIP file created at: /content/cry_data.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>