In [1]:
from __future__ import annotations

import json
import os
from pathlib import Path
from queue import Queue

import numpy as np
import pandas as pd
from librosa import load, feature
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

PATH_TO_FILES = Path('files/')

In [2]:
class FileFinder:
    suffixes: set[str]
    _queue: Queue[Path]

    def __init__(self, root, suffixes):
        self.root = Path(root)
        self._queue = Queue()
        self.suffixes = suffixes

        self._queue.put(self.root)

    def get_next_file(self):
        if self._queue.empty():
            print("Queue is empty")
            return None

        res = self._queue.get()
        if res.is_dir():
            for child in res.iterdir():
                if child.suffix[1:].lower() in self.suffixes or child.is_dir():
                    # print(f'Added {child.name} to queue [{"folder" if child.is_dir() else child.suffix}]')
                    self._queue.put(child)
            return Ellipsis
        else:
            return res if res.suffix[1:] in self.suffixes else None

    def reset(self):
        self._queue = Queue()
        self._queue.put(self.root)

    def get_rel_path(self, file: Path):
        return Path(os.path.relpath(file.parent, self.root).__str__())

    def get_full_path(self, file: Path):
        return self.root.joinpath(file)

class FileIO:
    _data: list[dict, ...]

    def __init__(self, path):
        self.path = Path(path)
        self._data = ...
    
    def get_data(self):
        with open(self.path) as json_file:
            self._data = json.load(json_file)
        return self._data
    
    def set_data(self, data):
        if type(data) is list:
            self._data = data
            with open(self.path, 'w') as json_file:
                json_file.write(json.dumps(self._data, sort_keys=False, indent=4))
        

In [3]:
#TODO: standardize/normalize features

class FeatureExtractor:
    def __init__(self):
        self.mfccs = 13
        self.rate = 22050
    
    def not_static(self):
        """
        Method to ensure all the methods cannot be static.
        """
        pass
    
    def tempo(self, audio):
        return np.mean(feature.tempo(y=audio, sr=self.rate).tolist())
    
    def rms(self, audio):
        self.not_static()
        return np.mean(feature.rms(y=audio).tolist())
    
    def mfcc(self, audio):
        return np.mean(feature.mfcc(y=audio, sr=self.rate, n_mfcc=self.mfccs).tolist(), axis=1)
    
    def bandwidth(self, audio):
        return np.mean(feature.spectral_bandwidth(y=audio, sr=self.rate).tolist())
    
    def centroid(self, audio):
        return np.mean(feature.spectral_centroid(y=audio, sr=self.rate).tolist())
    
    def contrast(self, audio):
        return np.mean(feature.spectral_contrast(y=audio, sr=self.rate).tolist())
    
    def rolloff(self, audio):
        return np.mean(feature.spectral_rolloff(y=audio, sr=self.rate).tolist())
    
    def zcr(self, audio):
        self.not_static()
        return np.mean(feature.zero_crossing_rate(y=audio).tolist())
    

In [4]:
class Model:
    def __init__(self, dt: list, features):
        self.dt = dt
        self.df = pd.DataFrame(dt)
        self.features = features
        self.categories = self.df[['genre']]
        self.predictors = self.df[features]
        self.predictors.columns = features
        self.model = ...
        self.split_data = ...
        self.res = ...
    
    def update_dataframe(self, df):
        self.df = df
    
    def generate_model(self, test_size=0.33, solver='newton-cg', iters=1000):
        self.split_data = train_test_split(self.predictors, self.categories, test_size=test_size, random_state=5)
        pre_train, pre_test, cat_train, cat_test = self.split_data
        self.model = LogisticRegression(multi_class='multinomial', solver=solver, max_iter=iters)
        self.model.fit(pre_train, cat_train.genre)
        self.res = self.model.predict(pre_test)
        
    def get_res(self):
        return self.res
    
    def get_matrix(self):
        return confusion_matrix(self.split_data[3], self.res)
    
    def get_report(self):
        return classification_report(self.split_data[3], self.res)

In [5]:
### ONLY RUN THIS CELL TO EXTRACT FEATURES
DATA = []
if input():
    finder = FileFinder(PATH_TO_FILES, ('au', ))
    output = FileIO('data.json')
    extractor = FeatureExtractor()
    
    while True:
        curr = finder.get_next_file()
        if not curr:
            break
        elif curr is ...:
            continue
        else:
            curr_file, fs = load(curr, duration=7, sr=extractor.rate)
            DATA.append(dict(
                id=curr.name,
                genre=curr.parent.name,
                **dict(zip((f'mfcc{n}' for n in range(extractor.mfccs)), extractor.mfcc(curr_file))),
                tempo=extractor.tempo(curr_file),
                rms=extractor.rms(curr_file),
                bandwidth=extractor.bandwidth(curr_file),
                centroid=extractor.centroid(curr_file),
                contrast=extractor.contrast(curr_file),
                rolloff=extractor.rolloff(curr_file),
                zcr=extractor.zcr(curr_file),
                prediction=''
            ))
            # print(curr.parent.name, curr.name.split('.')[1])
    
    output.set_data(DATA)
    print('Success')

In [6]:
### OLD CODE

# test_features = [*(f'mfcc{n}' for n in range(extractor.mfccs // 2)), 'zcr']
# 
# frame = pd.DataFrame(data)
# categories = frame[['genre']]
# predictors = frame[test_features]
# predictors.columns = test_features
# 
# pre_train, pre_test, cat_train, cat_test = train_test_split(predictors, categories, test_size=0.33, random_state=5)
# model = LogisticRegression(multi_class='multinomial', solver='newton-cg', max_iter=1000)
# model.fit(pre_train, cat_train.genre)
# predictions = model.predict(pre_test)
# print(confusion_matrix(cat_test, predictions))
# print(classification_report(cat_test, predictions))

In [7]:
### RUN THIS CELL TO GET THE DATA WITHOUT RE-EXTRACTING THE FEATURES 
DATA = FileIO('data.json').get_data()

In [8]:
### EXAMPLE MODEL



# Change the list in this constructor to change the features analyzed (names must match JSON file)
mE = Model(DATA,
           [*(f'mfcc{n}' for n in range(10)), 
            'contrast',
            'zcr']
           )

# Add parameters to change model functionality
mE.generate_model()
print(mE.get_matrix())
print(mE.get_report())

[[11  0  2  0  3]
 [ 0 11  0  6  3]
 [ 2  0  9  0  0]
 [ 1  3  0 13  3]
 [ 1  4  3  1  7]]
              precision    recall  f1-score   support

   classical       0.73      0.69      0.71        16
      hiphop       0.61      0.55      0.58        20
        jazz       0.64      0.82      0.72        11
         pop       0.65      0.65      0.65        20
      reggae       0.44      0.44      0.44        16

    accuracy                           0.61        83
   macro avg       0.61      0.63      0.62        83
weighted avg       0.61      0.61      0.61        83



In [9]:
# model 1, 5 mfccs
m1 = Model(DATA, [*(f'mfcc{n}' for n in range(5))])

# Add parameters to change model functionality
m1.generate_model()
print(m1.get_matrix())
print(m1.get_report())
# does good with classical and reggae classification

[[10  2  3  1  0]
 [ 0  9  0  9  2]
 [ 2  0  9  0  0]
 [ 0  1  0 18  1]
 [ 1  3  2  1  9]]
              precision    recall  f1-score   support

   classical       0.77      0.62      0.69        16
      hiphop       0.60      0.45      0.51        20
        jazz       0.64      0.82      0.72        11
         pop       0.62      0.90      0.73        20
      reggae       0.75      0.56      0.64        16

    accuracy                           0.66        83
   macro avg       0.68      0.67      0.66        83
weighted avg       0.67      0.66      0.65        83



In [10]:
# model 2, 10 mfccs
m2 = Model(DATA, [*(f'mfcc{n}' for n in range(10))])

# Add parameters to change model functionality
m2.generate_model()
print(m2.get_matrix())
print(m2.get_report())
# significantly worse at reggae, and overall

[[11  0  2  0  3]
 [ 0 10  1  6  3]
 [ 3  0  8  0  0]
 [ 0  4  0 14  2]
 [ 1  6  2  1  6]]
              precision    recall  f1-score   support

   classical       0.73      0.69      0.71        16
      hiphop       0.50      0.50      0.50        20
        jazz       0.62      0.73      0.67        11
         pop       0.67      0.70      0.68        20
      reggae       0.43      0.38      0.40        16

    accuracy                           0.59        83
   macro avg       0.59      0.60      0.59        83
weighted avg       0.59      0.59      0.59        83



In [11]:
# model 3, first 3 mfccs, skipping zero
m3 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4))])

# Add parameters to change model functionality
m3.generate_model()
print(m3.get_matrix())
print(m3.get_report())
# 0.70 accuracy, nice!

[[14  1  1  0  0]
 [ 1 10  0  6  3]
 [ 3  0  6  0  2]
 [ 0  1  0 18  1]
 [ 1  2  3  0 10]]
              precision    recall  f1-score   support

   classical       0.74      0.88      0.80        16
      hiphop       0.71      0.50      0.59        20
        jazz       0.60      0.55      0.57        11
         pop       0.75      0.90      0.82        20
      reggae       0.62      0.62      0.62        16

    accuracy                           0.70        83
   macro avg       0.69      0.69      0.68        83
weighted avg       0.69      0.70      0.69        83



In [12]:
# model 4, adding rms to the model
m4 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms'])

# Add parameters to change model functionality
m4.generate_model()
print(m4.get_matrix())
print(m4.get_report())
# slight improvement, may keep

[[14  1  1  0  0]
 [ 1 11  0  6  2]
 [ 3  0  6  0  2]
 [ 0  1  0 18  1]
 [ 1  2  3  0 10]]
              precision    recall  f1-score   support

   classical       0.74      0.88      0.80        16
      hiphop       0.73      0.55      0.63        20
        jazz       0.60      0.55      0.57        11
         pop       0.75      0.90      0.82        20
      reggae       0.67      0.62      0.65        16

    accuracy                           0.71        83
   macro avg       0.70      0.70      0.69        83
weighted avg       0.71      0.71      0.70        83



In [13]:
# model 5, adding spectral centroid to the model
m5 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms', 'centroid'])

# Add parameters to change model functionality
m5.generate_model()
print(m5.get_matrix())
print(m5.get_report())
# adding centroid significantly damages the models ability to correctly identify jazz, pop, and reggae

[[11  0  2  1  2]
 [ 0  9  0  7  4]
 [ 4  0  5  0  2]
 [ 0  1  1 18  0]
 [ 0  2  5  1  8]]
              precision    recall  f1-score   support

   classical       0.73      0.69      0.71        16
      hiphop       0.75      0.45      0.56        20
        jazz       0.38      0.45      0.42        11
         pop       0.67      0.90      0.77        20
      reggae       0.50      0.50      0.50        16

    accuracy                           0.61        83
   macro avg       0.61      0.60      0.59        83
weighted avg       0.63      0.61      0.61        83



In [14]:
# model 6, bandwidth
m5 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms', 'bandwidth'])

# Add parameters to change model functionality
m5.generate_model()
print(m5.get_matrix())
print(m5.get_report())
# bandwidth hurts classical and hiphop

[[ 9  1  4  0  2]
 [ 1  9  0  7  3]
 [ 4  0  6  0  1]
 [ 0  2  0 18  0]
 [ 0  4  2  1  9]]
              precision    recall  f1-score   support

   classical       0.64      0.56      0.60        16
      hiphop       0.56      0.45      0.50        20
        jazz       0.50      0.55      0.52        11
         pop       0.69      0.90      0.78        20
      reggae       0.60      0.56      0.58        16

    accuracy                           0.61        83
   macro avg       0.60      0.60      0.60        83
weighted avg       0.61      0.61      0.61        83



In [15]:
# model 7, contrast
m7 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms', 'contrast'])

# Add parameters to change model functionality
m7.generate_model()
print(m7.get_matrix())
print(m7.get_report())
# non beneficial

[[13  2  1  0  0]
 [ 0 10  0  7  3]
 [ 4  0  6  0  1]
 [ 0  1  0 18  1]
 [ 1  3  3  1  8]]
              precision    recall  f1-score   support

   classical       0.72      0.81      0.76        16
      hiphop       0.62      0.50      0.56        20
        jazz       0.60      0.55      0.57        11
         pop       0.69      0.90      0.78        20
      reggae       0.62      0.50      0.55        16

    accuracy                           0.66        83
   macro avg       0.65      0.65      0.65        83
weighted avg       0.65      0.66      0.65        83



In [16]:
# model 8, rolloff
m8 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms', 'rolloff'])

# Add parameters to change model functionality
m8.generate_model()
print(m8.get_matrix())
print(m8.get_report())
# rolloff hurt jazz detection significantly

[[ 9  0  4  1  2]
 [ 0  9  0  7  4]
 [ 4  0  5  0  2]
 [ 0  1  1 18  0]
 [ 0  4  4  1  7]]
              precision    recall  f1-score   support

   classical       0.69      0.56      0.62        16
      hiphop       0.64      0.45      0.53        20
        jazz       0.36      0.45      0.40        11
         pop       0.67      0.90      0.77        20
      reggae       0.47      0.44      0.45        16

    accuracy                           0.58        83
   macro avg       0.57      0.56      0.55        83
weighted avg       0.59      0.58      0.57        83



In [17]:
# model 9, zero crossing rate
m9 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms', 'zcr'])

# Add parameters to change model functionality
m9.generate_model()
print(m9.get_matrix())
print(m9.get_report())
# no real effect not attributed to mfcc 1-4 and rms

[[14  1  1  0  0]
 [ 1 11  0  6  2]
 [ 3  0  6  0  2]
 [ 0  1  0 18  1]
 [ 1  2  3  0 10]]
              precision    recall  f1-score   support

   classical       0.74      0.88      0.80        16
      hiphop       0.73      0.55      0.63        20
        jazz       0.60      0.55      0.57        11
         pop       0.75      0.90      0.82        20
      reggae       0.67      0.62      0.65        16

    accuracy                           0.71        83
   macro avg       0.70      0.70      0.69        83
weighted avg       0.71      0.71      0.70        83



In [18]:
#model 10, including another mfcc and tempo, contrast, and rolloff
m10 = Model(DATA, [*(f'mfcc{n}' for n in range(1,5)), 'rms', 'tempo', 'contrast', 'rolloff'])

# Add parameters to change model functionality
m10.generate_model()
print(m10.get_matrix())
print(m10.get_report())
# Adding

[[12  0  1  0  3]
 [ 0 11  0  7  2]
 [ 4  0  7  0  0]
 [ 0  1  0 18  1]
 [ 0  1  3  1 11]]
              precision    recall  f1-score   support

   classical       0.75      0.75      0.75        16
      hiphop       0.85      0.55      0.67        20
        jazz       0.64      0.64      0.64        11
         pop       0.69      0.90      0.78        20
      reggae       0.65      0.69      0.67        16

    accuracy                           0.71        83
   macro avg       0.71      0.70      0.70        83
weighted avg       0.72      0.71      0.71        83



In [19]:
# final contenders
# mF1, just MFCCs 1, 2, and 3, and the RMS.
mF1 = Model(DATA, [*(f'mfcc{n}' for n in range(1,4)), 'rms'])
mF1.generate_model()
print(mF1.get_matrix())
print(mF1.get_report())

#mF2, MFCCs 1-4, RMS, Tempo, Spectral Contrast, and Spectral Rolloff
mF2 = Model(DATA, [*(f'mfcc{n}' for n in range(1,5)), 'rms', 'tempo', 'contrast', 'rolloff'])
mF2.generate_model()
print(mF2.get_matrix())
print(mF2.get_report())

# mF1 being better at pop is weighing the results

[[14  1  1  0  0]
 [ 1 11  0  6  2]
 [ 3  0  6  0  2]
 [ 0  1  0 18  1]
 [ 1  2  3  0 10]]
              precision    recall  f1-score   support

   classical       0.74      0.88      0.80        16
      hiphop       0.73      0.55      0.63        20
        jazz       0.60      0.55      0.57        11
         pop       0.75      0.90      0.82        20
      reggae       0.67      0.62      0.65        16

    accuracy                           0.71        83
   macro avg       0.70      0.70      0.69        83
weighted avg       0.71      0.71      0.70        83

[[12  0  1  0  3]
 [ 0 11  0  7  2]
 [ 4  0  7  0  0]
 [ 0  1  0 18  1]
 [ 0  1  3  1 11]]
              precision    recall  f1-score   support

   classical       0.75      0.75      0.75        16
      hiphop       0.85      0.55      0.67        20
        jazz       0.64      0.64      0.64        11
         pop       0.69      0.90      0.78        20
      reggae       0.65      0.69      0.67        16

   