In [1]:
# Probably more imports than are really necessary...
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm
import librosa
import numpy as np
import miditoolkit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, average_precision_score, accuracy_score
import random
from music21 import converter, chord
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb

## Metrics

In [2]:
def accuracy1(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [3]:
def accuracy2(groundtruth, predictions):
    correct = 0
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        if predictions[k] == groundtruth[k]:
            correct += 1
    return correct / len(groundtruth)

In [4]:
TAGS = ['rock', 'oldies', 'jazz', 'pop', 'dance',  'blues',  'punk', 'chill', 'electronic', 'country']

In [5]:
def accuracy3(groundtruth, predictions):
    preds, targets = [], []
    for k in groundtruth:
        if not (k in predictions):
            print("Missing " + str(k) + " from predictions")
            return 0
        prediction = [1 if tag in predictions[k] else 0 for tag in TAGS]
        target = [1 if tag in groundtruth[k] else 0 for tag in TAGS]
        preds.append(prediction)
        targets.append(target)
    
    mAP = average_precision_score(targets, preds, average='macro')
    return mAP

## Task 2: Sequence prediction

In [6]:
dataroot2 = "student_files/task2_next_sequence_prediction/"

In [7]:
class model2():
    def __init__(self, threshold=0.48):  # Allow tuning threshold
        self.threshold = threshold

    def extract_features(self, path):
        midi_path = dataroot2 + '/' + path
        midi_obj = miditoolkit.midi.parser.MidiFile(midi_path)

        # Return all zeros if MIDI is empty or broken
        if len(midi_obj.instruments) == 0 or len(midi_obj.instruments[0].notes) == 0:
            return [0] * (6 + 12 + 3 + 2)

        notes = sorted(midi_obj.instruments[0].notes, key=lambda n: n.start)
        total_time = midi_obj.max_tick / midi_obj.ticks_per_beat  # in beats

        # Base features
        features = [
            min(note.pitch for note in notes),
            max(note.pitch for note in notes),
            len(set(note.pitch for note in notes)),
            sum(note.pitch for note in notes) / len(notes),
            sum(note.end - note.start for note in notes) / len(notes),
            len(notes) / total_time if total_time > 0 else 0
        ]

        # Interval histogram
        intervals = [abs(notes[i].pitch - notes[i - 1].pitch) % 12 for i in range(1, len(notes))]
        interval_hist = [0] * 12
        for i in intervals:
            interval_hist[i] += 1
        total_int = sum(interval_hist)
        interval_hist = [x / total_int for x in interval_hist] if total_int > 0 else interval_hist

        # Rhythm features
        durations = [note.end - note.start for note in notes]
        rhythm_features = [
            np.mean(durations),
            np.std(durations),
            max(durations) / np.mean(durations) if np.mean(durations) > 0 else 0
        ]

        # Key signature features
        try:
            score = converter.parse(midi_path)
            key = score.analyze('key')
            key_features = [key.tonic.pitchClass, 1 if key.mode == 'major' else 0]
        except:
            key_features = [0, 0]

        return features + interval_hist + rhythm_features + key_features

    def combine_pair_features(self, f1, f2):
        f1 = np.array(f1)
        f2 = np.array(f2)
        return np.concatenate([
            f1,
            f2,
            np.abs(f1 - f2),
            f1 * f2,
            np.minimum(f1, f2),
            np.maximum(f1, f2),
            f1 / (f2 + 1e-6),
            f2 / (f1 + 1e-6)
        ]).tolist()

    def train(self, path):
        d = eval(open(path, 'r').read())
        X, y = [], []

        for (p1, p2), label in tqdm(d.items(), desc="Extracting train features"):
            f1 = self.extract_features(p1)
            f2 = self.extract_features(p2)
            combined = self.combine_pair_features(f1, f2)
            X.append(combined)
            y.append(label)

        self.model = lgb.LGBMClassifier(
            n_estimators=400,
            learning_rate=0.03,
            num_leaves=128,
            max_depth=10,
            reg_alpha=0.1,
            reg_lambda=0.1,
            colsample_bytree=0.9,
            subsample=0.8,
            random_state=42
        )
        self.model.fit(X, y)

    def predict(self, path, outpath=None):
        d = eval(open(path, 'r').read())
        predictions = {}

        for (p1, p2) in tqdm(d, desc="Predicting"):
            f1 = self.extract_features(p1)
            f2 = self.extract_features(p2)
            combined = self.combine_pair_features(f1, f2)
            prob = self.model.predict_proba([combined])[0][1]
            predictions[(p1, p2)] = prob > self.threshold

        if outpath:
            with open(outpath, "w") as z:
                z.write(str(predictions) + '\n')
        return predictions

## Run everything...

In [8]:
def run2():
    model = model2()
    model.train(dataroot2 + "/train.json")
    train_preds = model.predict(dataroot2 + "/train.json")
    test_preds = model.predict(dataroot2 + "/test.json", "predictions2.json")
    
    train_labels = eval(open(dataroot2 + "/train.json").read())
    acc2 = accuracy2(train_labels, train_preds)
    print("Task 2 training accuracy = " + str(acc2))

In [9]:
run2()

Extracting train features: 100%|██████████| 9558/9558 [08:04<00:00, 19.74it/s]  


[LightGBM] [Info] Number of positive: 4779, number of negative: 4779
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 40180
[LightGBM] [Info] Number of data points in the train set: 9558, number of used features: 184
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


Predicting: 100%|██████████| 9558/9558 [13:15<00:00, 12.01it/s]   
Predicting: 100%|██████████| 3070/3070 [03:59<00:00, 12.82it/s]


Task 2 training accuracy = 1.0
