In [10]:
import librosa
import os
import numpy as np
import tqdm

current_dir = os.getcwd()

In [11]:
UK_OST = [(mp3, 1) for mp3 in os.listdir(os.path.join(current_dir, "Audio_Files\\UK_OST"))]
SMG_OST = [(mp3, 0) for mp3 in os.listdir(os.path.join(current_dir, "Audio_Files\\SMG_OST"))]
all_OST = UK_OST + SMG_OST

In [12]:
all_songs = []
all_labels = []

for song in all_OST:
    # y is the time series, sr is the sampling rate (frames/second)
    subfolder = "SMG_OST" if song[1] == 0 else "UK_OST"
    filepath = os.path.join(current_dir, f"Audio_Files\\{subfolder}\\{song[0]}")
    y, sr = librosa.load(filepath)
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    
    # Remember to add the tempo onto the start once the whole thing is done
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)
    
    # MFCC is the intensity of different frequency bands
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_mfcc=13)
    mfcc_delta = librosa.feature.delta(mfcc)
    beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

    # Chromagram is the relative intensity of different semitones (ascending from C)
    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
    beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median)

    # Stack all of these together and then flatten them, this is the model input.
    # Doing it all by beats, as the size may grow too much using frames.
    beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
    beat_features_flattened = beat_features.T.flatten()

    # loogame I didn't forget to add tempo
    np.insert(beat_features_flattened, 0, tempo)

    # And we have the features.
    all_songs.append(np.array(beat_features_flattened))
    all_labels.append(song[1])

    print(f"Analyzed song {song[0]} with tempo {tempo}")

Analyzed song (Alt)02IntotheFire.mp3 with tempo 80.74951171875
Analyzed song (Alt)03UnstoppableForce.mp3 with tempo 117.45383522727273
Analyzed song (Alt)04Cerberus.mp3 with tempo 107.666015625
Analyzed song (Alt)05AThousandGreetings.mp3 with tempo 107.666015625
Analyzed song (Alt)06AShatteredIllusion.mp3 with tempo 112.34714673913044
Analyzed song 01DeepBlue(EarlyVersion).mp3 with tempo 89.10290948275862
Analyzed song 01Intro(WeihnachtenAmKlavier).mp3 with tempo 107.666015625
Analyzed song 01Intro.mp3 with tempo 99.38401442307692
Analyzed song 01TheFireIsGone(forPiano,SaxophoneandTrumpet).mp3 with tempo 89.10290948275862
Analyzed song 01TheSpinalStaircase.mp3 with tempo 161.4990234375
Analyzed song 01TheWorldLooksWhite.mp3 with tempo 66.25600961538461
Analyzed song 02CHAOS.mp3 with tempo 117.45383522727273
Analyzed song 02DeepBlue(EarlyVersionCalmLoop).mp3 with tempo 89.10290948275862
Analyzed song 02DuneEternal.mp3 with tempo 92.28515625
Analyzed song 02IntotheFire.mp3 with tempo 80.

In [13]:
# import pickle

# with open('save_features.p', 'rb') as pickled_file:
#         all_songs = pickle.load(pickled_file)

# with open('save_labels.p', 'rb') as pickled_file:
#         all_labels = pickle.load(pickled_file)

In [16]:
max_len = max(len(s) for s in all_songs)
# Pad the array so the inputs are uniform
# Wow apparently np.pad doesn't work that way
all_features_padded = np.zeros((len(all_songs), max_len))
for i in range(len(all_songs)):
    all_features_padded[i][:len(all_songs[i])] = all_songs[i]

In [17]:
import pickle

pickle.dump(all_features_padded, open("padded_features.p", 'wb'))
pickle.dump(all_labels, open("all_labels.p", "wb"))