In [1]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import warnings
import inflect

p = inflect.engine()
npy_path = "./mfcc_npy"

# Define a mapping for folder names to JSON file names.
json_filenames = {
    "train": "data_train_art_4_classes.csv",
    "val": "data_val_4_classes.csv",
    "test": "data_test_4_classes.csv"
}

labels_mapping = {"four": 0, "three": 1, "five": 2, "seven": 3}

# Initialize data structure for each set
data_sets = {
    "train": {"mfcc": [], "labels": []},
    "val": {"mfcc": [], "labels": []},
    "test": {"mfcc": [], "labels": []}
}

# Assume directory structure is ../mfcc_npy/train/0/, ../mfcc_npy/train/1/, etc.
for split in data_sets.keys():
    split_path = os.path.join(npy_path, split)
    if not os.path.isdir(split_path):
        continue
    
    for label_dir in os.listdir(split_path):
        label_path = os.path.join(split_path, label_dir)
        if not os.path.isdir(label_path):
            continue
        
        word_label = p.number_to_words(label_dir)  # Convert folder name from number to word
        mapped_label = labels_mapping.get(word_label, -1)  # Map word label to a number label
        
        for file in os.listdir(label_path):
            if file.endswith(".npy"):
                file_path = os.path.join(label_path, file)
                mfcc = np.load(file_path).T.tolist()
                data_sets[split]["mfcc"].append(mfcc)
                data_sets[split]["labels"].append(mapped_label)

with open("data.json", "w") as fp:
    json.dump(data_sets, fp, indent=4)
# Write each data set to its corresponding JSON file
# for split, data in data_sets.items():
#     json_path = json_filenames.get(split)
#     if json_path:  # Ensure there's a corresponding JSON file path defined
#         with open(json_path.replace("csv","json"), "w") as fp:
#             json.dump(data, fp, indent=4)