In [1]:
import os
import pandas as pd
import pretty_midi
from tqdm import tqdm

In [2]:
def extract_features(midi_path):
    try:
        pm = pretty_midi.PrettyMIDI(midi_path)
        
        tempo = pm.get_tempo_changes()[1].mean() if pm.get_tempo_changes()[1].size > 0 else 120
        notes = [note.pitch for instrument in pm.instruments for note in instrument.notes]
        durations = [note.end - note.start for instrument in pm.instruments for note in instrument.notes]
        is_drum = any(instr.is_drum for instr in pm.instruments)
        
        return {
            "tempo": tempo,
            "note_density": len(notes) / pm.get_end_time() if pm.get_end_time() else 0,
            "pitch_mean": sum(notes)/len(notes) if notes else 0,
            "pitch_std": pd.Series(notes).std() if notes else 0,
            "note_duration_mean": sum(durations)/len(durations) if durations else 0,
            "instrument_count": len(pm.instruments),
            "is_drum_present": int(is_drum)
        }
    except Exception as e:
        print(f"❌ Error processing {midi_path}: {e}")
        return None

In [5]:
df = pd.read_csv('D:/Study/Ai_Music_Composer/project/data/labels.csv')
features = []

print("⏳ Extracting features from MIDI files...")

for idx, row in tqdm(df.iterrows(), total=len(df)):
    f = extract_features(row['file_path'])
    if f:
        f['file_path'] = row['file_path']
        f['mood'] = row['mood']
        features.append(f)

⏳ Extracting features from MIDI files...


100%|██████████| 108023/108023 [5:33:53<00:00,  5.39it/s]    


In [7]:
features_df = pd.DataFrame(features)
features_df.to_csv('D:/Study/Ai_Music_Composer/project/features/all_features.csv', index=False)

print(f"\n✅ Saved extracted features for {len(features_df)} MIDI files to features/all_features.csv")


✅ Saved extracted features for 108023 MIDI files to features/all_features.csv
