# IOI complexity and event density

## Import dependencies, set constants etc.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats

from src import utils
from src.features.rhythm_features import IOIComplexity
from src.visualise.complexity_plots import *

## Load in data

In [None]:
onsets = utils.unserialise_object(fr'{utils.get_project_root()}\models\matched_onsets_corpus_chronology.p')

In [None]:
fracs = [1, 1/2, 5/12, 3/8, 1/3, 1/4, 1/6, 1/8, 1/12, 0]
fracs_s = [r'>$\frac{1}{2}$', r'$\frac{1}{2}$', r'$\frac{5}{12}$', r'$\frac{3}{8}$', r'$\frac{1}{3}$', r'$\frac{1}{4}$', r'$\frac{1}{6}$', r'$\frac{1}{8}$', r'$\frac{1}{12}$', r'<$\frac{1}{12}$']
assert sorted(fracs, reverse=True) == fracs

In [None]:
res = []
for instr in ['piano']:
    for track in onsets:
        # if track.item['time_signature'] == 3:
        #     continue
        tempo = 60 / track.tempo
        iois = pd.Series(track.ons[instr]).diff().dropna().to_list()
        for ioi in iois:
            prop_ioi = ioi / tempo
            if prop_ioi > 4:
                continue
            prop_ioi /= 4
            bin_ = min(fracs, key=lambda x: abs(x - prop_ioi))
            res.append(dict(instr=instr, ioi=ioi, prop_ioi=prop_ioi, bin=bin_))
rhythm = pd.DataFrame(res)

## Stacked bar plot of all IOIs

In [None]:
import src.visualise.visualise_utils as vutils

class BarPlotTotalBinsPiano(BarPlotTotalBins):
    def __init__(self, ioi_df, **kwargs):
        super().__init__(ioi_df, **kwargs)
        self.figure_title = fr'complexity_plots\barplot_totalbins_piano_{self.corpus_title}'

    def _create_plot(self):
        return self.df.plot(
            kind='bar', stacked=True, ax=self.ax, color=vutils.RED, zorder=10,
            lw=vutils.LINEWIDTH, edgecolor=vutils.BLACK, ylabel='Count',
            xlabel='Bin'
        )

    def _format_ax(self):
        self.ax.set(xticklabels=reversed(fracs_s))
        self.ax.tick_params(axis='both', width=vutils.TICKWIDTH, color=vutils.BLACK, rotation=0)
        plt.setp(self.ax.spines.values(), linewidth=vutils.LINEWIDTH, color=vutils.BLACK)
        self.ax.grid(zorder=0, axis='y', **vutils.GRID_KWS)
        self.ax.get_legend().remove()
        self._add_notation_images(y=75000)
        ax_t = self.ax.secondary_xaxis('top')
        ax_t.set_xticks(self.ax.get_xticks(), labels=[])
        ax_t.tick_params(width=vutils.TICKWIDTH)

BarPlotTotalBinsPiano(rhythm).create_plot()
plt.show()

In [None]:
rhythm.shape

In [None]:
rhythm['bin'].value_counts(normalize=False)

## Histogram plot of proportional IOI durations and bins

In [None]:
HistPlotBins(rhythm).create_plot()
plt.show()

## Define our extractor class for calculating IOI complexity (to be added to `src.features.features_utils`

In [None]:
cdfs = []
total_score = 0
for num, t in enumerate(onsets):
    print(f'{num} / {len(onsets)}')
    downbeats = t.ons['downbeats_manual']
    tempo = t.tempo
    time_signature = t.item['time_signature']
    # if time_signature == 3:
    #     continue
    for instr in ['piano']:
        my_onsets = t.ons[instr]
        cdf = IOIComplexity(
            my_onsets=my_onsets,
            downbeats=downbeats,
            tempo=tempo,
            time_signature=time_signature
        )
        total_score += len(cdf.complexity_df)
        cdf = cdf.complexity_df.mean().to_dict()
        cdf['mbz_id'] = t.item['mbz_id']
        cdf['instr'] = instr
        cdf['tempo'] = t.tempo
        cdfs.append(cdf)
print(total_score)

## Get the correlation of onset density and complexity

In [None]:
average = pd.DataFrame(cdfs).drop(columns=['bar_range']).dropna(subset=['lz77', 'n_onsets'])
average['n_onsets'] = average['n_onsets'].astype(float)
average.head()

In [None]:
1 - average.corr() ** 2

In [None]:
average.shape

## Bootstrap mean complexity/density

In [None]:
average.groupby('instr').mean(numeric_only=True)

In [None]:
for idx, grp in average.groupby('instr'):
    for var in ['n_onsets', 'lz77']:
        true_mean = grp[var].mean()
        boots = [grp[var].sample(frac=1, replace=True, random_state=i).mean() for i in range(100)]
        low, high = np.percentile(boots, 2.5,), np.percentile(boots, 97.5)
        print(idx, var, low, true_mean, high)

## Plot average complexity and density

In [None]:
BarPlotComplexityDensity(average).create_plot()
plt.show()

## Plot relationships between tempo and complexity/density

In [None]:
RegPlotTempoDensityComplexity(average).create_plot()
plt.show()

In [None]:
stats.pearsonr(average.dropna()['tempo'], average.dropna()['n_onsets'])

In [None]:
average.dropna().shape