# ISMIR 2020: Schema Instance Classification

This notebook is used to generate the results presented in the paper.

## Data Loading

In [None]:
import Pkg; Pkg.activate(".."); Pkg.instantiate();

In [None]:
include("../src/classify.jl"); pgfplotsx();

In [None]:
# setup

corpusdir = "../../schema_annotation_data/data/mozart_sonatas/"; # TODO: fix this for the final notebook

lex = Polygrams.loadlexicon(joinpath(corpusdir, "..", "lexicon.json"));

# TODO: select schemata
ismirschemas = [
    "doremi.2", # 5
    "fenaroli.2", # 10
    "fenaroli.2.min", # 3
    #"fenaroli.2.basscanon", # 0
    "fenaroli.2.basscanon.min", # 1
    #"fenaroli.2.durante",
    #"fenaroli.2.durante.min",
    "fenaroli.2.flipped", # 43
    "fenaroli.2.flipped.min", # 8
    "fenaroli.2.melcanon", # 6
    "fenaroli.2.melcanon.min", # 2
    #"folia.2", # 0
    "fonte.2", # 49
    "fonte.2.flipped", # 2
    "fonte.2.majmaj", # 8
    #"grandcad.2", # 0
    "indugio.2", # 9
    "indugio.2.voiceex", # 5
    "lamento.2", # 2
    "lully.2", # 2
    "morte.2", # 1
    "prinner.2", # 32
    "quiescenza.2", # 46
    "quiescenza.2.diatonic", # 6
    "solfami.2" # 4
];

In [None]:
# loading and cleaning

df, notelists, foldednotedicts, tsigs =
    loadcorpusdata(corpusdir, ismirschemas);

df = cleancorpusdata(df, lex, foldednotedicts)
df = findgroups(df)
df = findfullcontexts(df, notelists)

describe(df)

In [None]:
# basic statistics
n = size(df)[1]
npos = count(df.isinstance)

println("data points: ", n)
println("positives: $npos ($(npos/n))")
println("negatives: $(n-npos) ($(1-(npos/n)))")

println("total number of notes: ", sum(length.(values(foldednotedicts))))

sort(by(df, :schema, instances=:isinstance=>count), :schema)

## Features

In [None]:
# define the set of "independent" features
# which don't depend on training data

features = Dict(
    :mweight => row -> mWeight(row.noteswritten, get(row.timesigs)),
    :dur =>     row -> getDuration(row.notes, row.beatfactor),
    :vdist =>   row -> Polygrams.voicedist(row.notes),
    #:sskip =>   row -> stageSkip(row.notes, row.beatfactor),
    :rdsums =>  row -> rhythmDistanceSumInEvent(row.notes, row.beatfactor),
    :rdsumv =>  row -> rhythmDistanceSumInVoice(row.notes, row.beatfactor),
    :onsets =>  row -> onsetsinstage(row.notes, row.context),
    :rreg =>    row -> rhythmicirregularity(row.notes, row.beatfactor),
    :mreg   =>  row -> metricirregularity(row.notes, row.beatfactor),
    :pdsums =>  row -> pitchDistanceSumInEvent(row.notes),
    :pdsumv =>  row -> pitchDistanceSumInVoice(row.notes),
    :preg =>    row -> pitchirregularity(row.notes),
)

indepfeatnames = [:rreg, :mreg, :preg,
                  :rdsums, :rdsumv, :pdsums, :pdsumv,
                  :mweight, :dur, :onsets, :vdist] # independent feature names
featnames = [:profiledist, indepfeatnames...]; # all feature names

In [None]:
# run the features on the whole dataset
df = runfeatures(df, features);
describe(df[!,indepfeatnames])

## Training and Evaluation

In [None]:
# define the training pipeline

function pipeline(dftrain, dftest)
    # train and run features that depend on training data
    info = trainfeatures(dftrain)
    dftrain = rundepfeatures(dftrain, info)
    dftest  = rundepfeatures(dftest,  info)
    
    # upsample the dataset to balance the classes
    dfutrain = upsample(dftrain)
    dfutest  = upsample(dftest)
    
    # fit the model
    modelu = fitmodel(dfutrain, featnames)
    # TODO: add other models
    
    # make predictions
    dfupred = addpredictions(dfutest, modelu) # on balanced data
    dfpredcorr = addpredictions(dftest, modelu)
    dfpred = addpredictions(dftest, modelu; corrinter=0)
    
    println(modelu)
    println()
    showeval(dfupred)
    println()
    
    return dfupred, dfpredcorr, dfpred, modelu
end

In [None]:
# run the crossvalidation and collect the data

crossdata = crossval(pipeline, df, 5);

dfupred = vcat(getindex.(crossdata, 1)...);
dfpredcorr = vcat(getindex.(crossdata, 2)...);
dfpred = vcat(getindex.(crossdata, 3)...);
models = getindex.(crossdata, 4);

### Direct Evaluation

In [None]:
# overall evaluation

showeval(dfupred)
evaltex(dfupred)

In [None]:
showeval(dfpred)
evaltex(dfpred)

In [None]:
showeval(dfpredcorr)
evaltex(dfpredcorr)

### Grouped Evaluation

In [None]:
groupdata(df) = by(df, :group, isinstance=:isinstance=>any, predbool=:predbool=>any)
dfupredgrp = groupdata(dfupred)
dfpredgrp = groupdata(dfpred)
dfpredcorrgrp = groupdata(dfpredcorr);

In [None]:
showeval(dfupredgrp) # note that grouping removes the effect of upsampling
evaltex(dfupredgrp)

In [None]:
showeval(dfpredgrp)
evaltex(dfpredgrp)

In [None]:
# this doesn't really make sense because it's not corrected to the right proportion
showeval(dfpredcorrgrp)
evaltex(dfpredcorrgrp)

In [None]:
# run the predictions again but correct for the imbalance of the grouped data
preds = map(getindex.(crossdata, 3), models) do dfpart, model
    dfpartgrp = groupdata(dfpart)
    prop = count(dfpartgrp.isinstance) / size(dfpartgrp)[1]
    dfpart = addpredictions(dfpart, model; tau=prop)
    dfpart
end
dfpredcorr2 = vcat(preds...)
dfpredcorr2grp = groupdata(dfpredcorr2)
showeval(dfpredcorr2grp)
evaltex(dfpredcorr2grp)

## Model Inspection

In [None]:
# fit the model again, this time on the full data
# # dependent features are still based on the cross-validation splits

info = trainfeatures(df);
dfcomplete = rundepfeatures(df, info);

In [None]:
dfucomplete = upsample(dfcomplete)
dfdcomplete = downsample(dfcomplete)
model = fitmodel(dfucomplete, featnames)

In [None]:
cnames = plotmodel(model; xrotation=90, typ=:bar, tickfontsize=12, tickfonthalign=:hcenter, legend=false)
pointdict = Dict(zip(coefnames(model), eachrow(hcat(coef.(models)...))))
points = vcat((pointdict[n]' for n in cnames)...)
n = size(points)[1]
modelplot = plot!([x - 0.33 for x in 1:n], points ; typ=:scatter, color=:black, alpha=0.5)

In [None]:
savefig(modelplot, "modelplot.tex")
savefig(modelplot, "modelplot.pdf")

In [None]:
dfucomplete = addpredictions(dfucomplete, model)
showeval(dfucomplete)

In [None]:
dfcomplete = addpredictions(dfcomplete, model; corrinter=0);

In [None]:
predplt = plotcol(dfcomplete, :pred;
                  group=:isinstance, label=["non-insts" "instances"],
                  title="Combined Prediction",
                  size=(600,200), lw=1,
                  ls=[:dash :solid], legend=:topright, legendfontsize=11)

In [None]:
savefig(predplt, "preddist.tex")
savefig(predplt, "preddist.pdf")

In [None]:
featplts = featureplots(dfcomplete, featnames; group=:isinstance,# width=600, height=150,
                        labels=["non-insts" "instances"], title="",
                        ls=[:dash :solid], lw=1,
                        titlefontsize=14, legendfontsize=11)

In [None]:
distplt = plot(featplts...; layout=grid(6,2), size=(600,1000))

In [None]:
savefig(distplt, "featdist.tex")
savefig(distplt, "featdist.pdf")

## Inspecting Misclassified Matches

In [None]:
wrong = confidentfps(dfpred[.! dfpred.groupisinstance,:])

In [None]:
wrong[wrong.schema .== "fonte.2", [:piece, :notestring, :pred]]