In [None]:
import os
import yaml
from pydub import AudioSegment

DIR_PATH = "slakh2100_flac_redux/reduced_train"

In [None]:
# Loop through each track 
for track in os.listdir(DIR_PATH):
    track_path = os.path.join(DIR_PATH, track)
    metadata_path = os.path.join(track_path, "metadata.yaml")
    stems_path = os.path.join(track_path, "stems")
    mix_path = os.path.join(track_path, "mix.flac")
    
    if not os.path.isdir(track_path):
        continue  
    if not os.path.exists(metadata_path):
        continue
    
    # Load metadata
    print(f"{track}")  
    with open(metadata_path, "r") as f:
        metadata = yaml.safe_load(f)
    if "stems" not in metadata:
        continue  
    
    # Instrument classes to remove
    remove_list = {"Chromatic Percussion", "Organ", 
                   "Synth Lead", "Synth Pad", 
                   "Sound Effects", "Ethnic", 
                   "Percussive", "Sound effects"}

    stems_to_remove = []
    for stem_id, stem_data in metadata["stems"].items():
        inst_class = stem_data.get("inst_class", "")  
        if inst_class in remove_list:  
            stems_to_remove.append(stem_id)

    if not stems_to_remove: 
        continue  

    # Remove stems and update metadata
    for stem_id in stems_to_remove:
        stem_file = os.path.join(stems_path, f"{stem_id}.flac")
        if os.path.exists(stem_file):
            os.remove(stem_file)
        del metadata["stems"][stem_id]  

    with open(metadata_path, "w") as f:
        yaml.dump(metadata, f, default_flow_style=False)

    # Collect remaining stems
    remaining_stems = []
    for stem_id in metadata["stems"]:
        stem_file = os.path.join(stems_path, f"{stem_id}.flac")  
        if os.path.exists(stem_file):
            remaining_stems.append(stem_file)

    # Delete track folder if no stems left
    if not remaining_stems:
        print(f"No stems Deleting track")
        for root, dirs, files in os.walk(track_path, topdown=False):
            for file in files:
                os.remove(os.path.join(root, file))
            for dir in dirs:
                os.rmdir(os.path.join(root, dir))
        os.rmdir(track_path)
        continue  

    # Mix the remaining stems
    mixed_audio = None
    for stem_file in remaining_stems:
        audio = AudioSegment.from_file(stem_file)
        if mixed_audio is None:
            mixed_audio = audio
        else:
            mixed_audio = mixed_audio.overlay(audio) 
            
    mixed_audio.export(mix_path, format="flac")
    print(f"Finished")


Track00001
Track00002
Track00003
Track00004
Track00005
Track00006
Track00007
Track00008
Track00009
Track00010
Track00011
Track00012
Track00013
Track00014
Track00015
Track00016
Track00017
Track00018
Track00019
Track00020
Track00021
Track00022
Track00023
Track00024
Track00025
Track00026
Track00027
Track00028
Track00029
Track00030
Track00031
Track00032
Track00033
Track00034
Track00035
Track00036
Track00037
Track00038
Track00039
Track00040
Track00041
Track00042
Track00043
Track00044
Track00045
Track00046
Track00047
Track00048
Track00050
Track00051
Track00052
Track00053
Track00054
Track00055
Track00056
Track00057
Track00058
Track00059
Track00060
Track00061
Track00062
Track00063
Track00064
Track00065
Track00066
Track00067
Track00068
Track00069
Track00070
Track00071
Track00072
Track00073
Track00074
Track00075
Track00076
Track00078
Track00079
Track00080
Track00081
Track00082
Track00083
Track00084
Track00085
Track00086
Track00087
Track00088
Track00089
Track00090
Track00091
Track00092
Track00093