In [1]:
import pickle
import torch
import numpy as np

In [113]:
ways = 9

# Extract and open datasets

import zipfile
with zipfile.ZipFile(f"{ways}_way_dataset.zip", 'r') as f:
    f.extractall()

with open(f"{ways}_way_dataset.pkl", "rb") as f:
    x_train, y_train, x_valid, y_valid, x_test, y_test, m_train, m_valid, m_test = pickle.load(f)

composers = np.unique(y_train)
y_train = np.stack(y_train)
y_test = np.stack(y_test)

In [114]:
def sample_data(N=10):
    composer_pieces = {composer:x_train[np.argwhere(y_train == composers[0]).flatten()] for composer in composers}
    composer_pieces = {k:v[np.random.choice(np.arange(len(v)), size=(N,), replace=False)] for k, v in composer_pieces.items()}

    y_few = []
    x_few = []

    for composer, pieces in composer_pieces.items():
        for piece in pieces:
            y_few.append(composer)
            x_few.append(piece)

    y_few = np.array(y_few)
    x_few = np.stack(x_few)

    return x_few, y_few

x_few, y_few = sample_data()

print(y_few.shape)
print(x_few.shape)

(90,)
(90, 64, 62)


In [115]:
import pandas as pd

# Helper func
def expand_repeat(arr, repeats, axis=0):
    return np.repeat(np.expand_dims(arr, axis=axis), repeats, axis=axis)

def fit_and_predict(x_few, y_few, x_test, extractor): # Function to take x_few and y_few and make guesses about all the x_test samples
    y_pred = None

    few_features = extractor(x_few) # L x d (where L is N*#composers)
    test_features = extractor(x_test)# M x d

    # Want to end with M x L vector

    euclideans = np.sqrt(np.sum(np.square(expand_repeat(few_features, len(x_test), 0) - expand_repeat(test_features, N*ways, 1)), axis=-1))

    avg_euclideans = np.zeros((len(x_test), ways))

    for i, composer in enumerate(composers):
        composer_points = np.argwhere(y_few == composer).flatten()

        avg_euclideans[:, i] = np.mean(euclideans[:, composer_points], axis=-1)

    ranks = np.argsort(avg_euclideans, axis=-1)

    return ranks

In [116]:
def top_x_acc(y_true, y_pred, x):
    y_true = torch.Tensor(y_true)
    y_pred = torch.Tensor(y_pred)
    # ranked = torch.argsort(y_pred, axis=-1)
    top_x = y_pred[..., :x]
    # print(ranked.shape)
    return (top_x == torch.repeat_interleave(y_true.unsqueeze(-1), x, axis=-1)).float().sum(-1).mean().item()

def mean_recip_rank(y_true, y_pred):
    y_true = torch.Tensor(y_true)
    y_pred = torch.Tensor(y_pred)
    # ranked = torch.argsort(y_pred, axis=-1)
    
    # print(ranked.shape, torch.repeat_interleave(y_true.unsqueeze(-1), y_true.shape[-1], axis=-1).shape)

    # starts with worst at 0 but we want best at 1 so
    true_ranks = (y_pred == torch.repeat_interleave(y_true.unsqueeze(-1), y_pred.shape[-1], axis=-1)).float().argmax(-1) + 1

    return (1/true_ranks).mean().item()

In [120]:
T = 30


def random_extractor(x): # function to get extracted features from data
    d = 5
    return np.random.normal(size=(len(x), d))


models = {"Random":random_extractor}
Ns = [1, 10, 100]

results = {}

for N in Ns:
    results[N] = {}

    for model_name, extractor in models.items():

        top1s = []
        top5s = []
        mrrs = []

        for _ in range(T):
            x_few, y_few = sample_data(N=N)

            y_pred = fit_and_predict(x_few, y_few, x_test, extractor)
            y_true = np.array([list(composers).index(i) for i in y_test])

            top1 = top_x_acc(y_true, y_pred, 1)
            top5 = top_x_acc(y_true, y_pred, 5)
            mrr = mean_recip_rank(y_true, y_pred)

            top1s.append(top1)
            top5s.append(top5)
            mrrs.append(mrr)

        results[N][model_name] = {"top1":np.mean(top1s), "top5":np.mean(top5s), "mrr":np.mean(mrrs)}

In [121]:
print(results)

{1: {'Random': {'top1': 0.1113998552163442, 'top5': 0.5545449495315552, 'mrr': 0.31415307422478994}}, 10: {'Random': {'top1': 0.11055583258469899, 'top5': 0.5546393533547719, 'mrr': 0.3136714696884155}}, 100: {'Random': {'top1': 0.11144427756468454, 'top5': 0.5534010708332062, 'mrr': 0.3141970207293828}}}
