In [1]:
# Probably more imports than are really necessary...
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm
import librosa
import numpy as np
import miditoolkit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, average_precision_score, accuracy_score
import random
from music21 import converter, chord
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

## Metrics

In [2]:
def accuracy1(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [3]:
def accuracy2(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [4]:
TAGS = ['rock', 'oldies', 'jazz', 'pop', 'dance',  'blues',  'punk', 'chill', 'electronic', 'country']

In [5]:
def accuracy3(groundtruth, predictions):
    preds, targets = [], []
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        prediction = [1 if tag in predictions[k] else 0 for tag in TAGS]
        target = [1 if tag in groundtruth[k] else 0 for tag in TAGS]
        preds.append(prediction)
        targets.append(target)
    
    mAP = average_precision_score(targets, preds, average='macro')
    return mAP

## Task 1: Composer classification

In [6]:
dataroot1 = "student_files/task1_composer_classification/"

In [7]:
class model1():

    def __init__(self):
        self.vectorizer = TfidfVectorizer(max_features=100)

    def get_all_notes(self, midi_obj):
        notes = []
        for inst in midi_obj.instruments:
            notes.extend(inst.notes)
        return sorted(notes, key=lambda n: n.start)

    def get_chord_sequence(self, path):

        try:
            score = converter.parse(dataroot1 + '/' + path)
            chords = score.chordify().recurse().getElementsByClass('Chord')
            roman_progression = [chord.Chord(c.pitches).pitchedCommonName for c in chords]
            return ' '.join(roman_progression)
        
        except:
            return ""
        
    def get_pitch_features(self, notes):
        if not notes:
            return [0] * (6 + 12 + 12 + 144)

        total_time = notes[-1].end
        stats = [
            min(note.pitch for note in notes),
            max(note.pitch for note in notes),
            len(set(note.pitch for note in notes)),
            sum(note.pitch for note in notes) / len(notes),
            sum(note.end - note.start for note in notes) / len(notes),
            len(notes) / total_time if total_time > 0 else 0,
        ]
        pitch_hist = [0] * 12

        for note in notes:
            pitch_hist[note.pitch % 12] += 1

        total = sum(pitch_hist)
        pitch_hist = [x / total for x in pitch_hist] if total > 0 else pitch_hist
        interval_hist = [0] * 12

        if len(notes) >= 2:
            intervals = [abs(notes[i].pitch - notes[i-1].pitch) % 12 for i in range(1, len(notes))]

            for i in intervals:
                interval_hist[i] += 1

            total = sum(interval_hist)
            interval_hist = [x / total for x in interval_hist] if total > 0 else interval_hist

        bigram_hist = [0] * 144

        if len(notes) >= 3:

            bigrams = [(abs(notes[i].pitch - notes[i-1].pitch) % 12, abs(notes[i+1].pitch - notes[i].pitch) % 12) for i in range(len(notes) - 2)]

            for i1, i2 in bigrams:
                bigram_hist[i1 * 12 + i2] += 1

            total = sum(bigram_hist)
            bigram_hist = [x / total for x in bigram_hist] if total > 0 else bigram_hist

        return stats + pitch_hist + interval_hist + bigram_hist

    def get_meta(self, midi_obj, path):

        try:
            score = converter.parse(dataroot1 + '/' + path)
            key = score.analyze('key')
            tonic_class = key.tonic.pitchClass
            mode_val = 1 if key.mode == 'major' else 0

        except:
            tonic_class, mode_val = 0, 0

        if midi_obj.tempo_changes:
            tempo_micro = midi_obj.tempo_changes[0].tempo
            bpm = 60000000 / tempo_micro

        else:
            bpm = 120

        return [bpm, tonic_class, mode_val]

    def features(self, path, include_tfidf=False):

        midi_obj = miditoolkit.midi.parser.MidiFile(dataroot1 + '/' + path)
        notes = self.get_all_notes(midi_obj)
        chord_seq = self.get_chord_sequence(path)
        base_feats = self.get_pitch_features(notes) + self.get_meta(midi_obj, path)
        return (base_feats, chord_seq) if include_tfidf else base_feats

    def train(self, path):
        with open(path, 'r') as f:
            train_json = eval(f.read())

        paths = list(train_json.keys())
        labels = list(train_json.values())

        base_X = []
        chord_texts = []

        for p in tqdm(paths, desc="Extracting features"):
            bf, chord_str = self.features(p, include_tfidf=True)
            base_X.append(bf)
            chord_texts.append(chord_str)

        tfidf_X = self.vectorizer.fit_transform(chord_texts).toarray()
        X = [np.concatenate([b, t]) for b, t in zip(base_X, tfidf_X)]
        y = labels

        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

        self.model = HistGradientBoostingClassifier(
            max_iter=300,
            learning_rate=0.05,
            max_leaf_nodes=64,
            random_state=42
        )
        self.model.fit(X_train, y_train)
        val_preds = self.model.predict(X_val)
        print("Validation accuracy =", accuracy_score(y_val, val_preds))

    def predict(self, path, outpath=None):
        d = eval(open(path, 'r').read())
        predictions = {}

        for k in tqdm(d, desc="Predicting"):
            base_feats, chord_str = self.features(k, include_tfidf=True)
            tfidf_feats = self.vectorizer.transform([chord_str]).toarray()[0]
            x = np.concatenate([base_feats, tfidf_feats])
            pred = self.model.predict([x])
            predictions[k] = str(pred[0])

        if outpath:
            with open(outpath, "w") as z:
                z.write(str(predictions) + '\n')
        return predictions


## Run everything...

In [8]:
def run1():
    model = model1()
    model.train(dataroot1 + "/train.json")
    train_preds = model.predict(dataroot1 + "/train.json")
    test_preds = model.predict(dataroot1 + "/test.json", "predictions1.json")
    
    train_labels = eval(open(dataroot1 + "/train.json").read())
    acc1 = accuracy1(train_labels, train_preds)
    print("Task 1 training accuracy = " + str(acc1))

In [9]:
run1()

Extracting features:   0%|          | 0/1210 [00:00<?, ?it/s]

Extracting features: 100%|██████████| 1210/1210 [04:01<00:00,  5.02it/s]


Validation accuracy = 0.8181818181818182


Predicting: 100%|██████████| 1210/1210 [15:02<00:00,  1.34it/s]  
Predicting: 100%|██████████| 389/389 [02:54<00:00,  2.23it/s]

Task 1 training accuracy = 0.9636363636363636



