In [None]:
import zipfile
import os

# Path to the uploaded zip file
zip_file_path = '///content/cry_data.zip'
extract_path = '//content/cry_data'

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the files that were extracted
extracted_files = os.listdir(extract_path)
extracted_files

['belly_pain', 'tired', 'burping', 'hungry', 'discomfort']

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Paths
base_dir = "/content/cry_data"
train_dir = "/content/train"
test_dir = "/content/test"

# Create train and test directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Split ratio
split_ratio = 0.8

# Process each class folder in the base directory
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)

    # Ensure it's a directory
    if os.path.isdir(class_path):
        print(f"Processing class: {class_name}")

        # Get all files in the current class folder
        files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]

        if not files:  # Skip if no files are found
            print(f"Skipping empty folder for class: {class_name}")
            continue

        # Shuffle and split the files into train and test
        train_files, test_files = train_test_split(files, train_size=split_ratio, random_state=42)

        # Create class folders in train and test directories
        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        # Copy files to the train directory
        for file in train_files:
            src_path = os.path.join(class_path, file)
            dest_path = os.path.join(train_class_dir, file)
            shutil.copy(src_path, dest_path)

        # Copy files to the test directory
        for file in test_files:
            src_path = os.path.join(class_path, file)
            dest_path = os.path.join(test_class_dir, file)
            shutil.copy(src_path, dest_path)

print("Data successfully split into train and test folders!")


Processing class: belly_pain
Processing class: tired
Processing class: burping
Processing class: hungry
Processing class: discomfort
Data successfully split into train and test folders!


In [None]:
#!pip install librosa
import os
import librosa
import numpy as np
import pandas as pd




In [None]:
train_path = '/content/train'  # Path to the train folder
mfcc_data = []  # List to store data for CSV


In [None]:
def extract_mfcc(file_path, n_mfcc=18):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfccs = np.mean(mfccs.T, axis=0)  # Take the mean across time frames
    return mfccs


In [None]:
for class_folder in os.listdir(train_path):
    class_path = os.path.join(train_path, class_folder)
    print(class_path)
    if os.path.isdir(class_path):
        for file_name in os.listdir(class_path):
            file_path = os.path.join(class_path, file_name)

            if file_name.endswith('.wav'):
                mfcc_features = extract_mfcc(file_path)
                mfcc_data.append([*mfcc_features, class_folder])  # Append features with label


/content/train/belly_pain
/content/train/tired
/content/train/burping
/content/train/hungry
/content/train/discomfort


In [None]:
# Create a DataFrame with MFCC features as columns and 'label' as the target column
num_mfcc = len(mfcc_data[0]) - 1
column_names = [f'mfcc_{i}' for i in range(num_mfcc)] + ['label']
df = pd.DataFrame(mfcc_data, columns=column_names)

# Save DataFrame to CSV
df.to_csv('train_mfcc_features.csv', index=False)


In [None]:
test_path = '/content/test'  # Path to the train folder
mfcc_data = []  # List to store data for CSV


In [None]:
def extract_mfcc(file_path, n_mfcc=18):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfccs = np.mean(mfccs.T, axis=0)  # Take the mean across time frames
    return mfccs


In [None]:
for class_folder in os.listdir(test_path):
    class_path = os.path.join(test_path, class_folder)
    print(class_path)
    if os.path.isdir(class_path):
        for file_name in os.listdir(class_path):
            file_path = os.path.join(class_path, file_name)

            if file_name.endswith('.wav'):
                mfcc_features = extract_mfcc(file_path)
                mfcc_data.append([*mfcc_features, class_folder])  # Append features with label


/content/test/belly_pain
/content/test/tired
/content/test/burping
/content/test/hungry
/content/test/discomfort


In [None]:
# Create a DataFrame with MFCC features as columns and 'label' as the target column
num_mfcc = len(mfcc_data[0]) - 1
column_names = [f'mfcc_{i}' for i in range(num_mfcc)] + ['label']
df = pd.DataFrame(mfcc_data, columns=column_names)

# Save DataFrame to CSV
df.to_csv('test_mfcc_features.csv', index=False)
