# Onset asynchrony

## Import dependencies, set constants etc.

In [None]:
from itertools import pairwise, combinations

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

import src.visualise.visualise_utils as vutils
from src import utils
from src.features.rhythm_features import ProportionalAsynchrony
from src.visualise.asynchrony_plots import *

## Load in data
We need to unserialise our list of onsets first

In [None]:
onsets: list = utils.unserialise_object(fr'{utils.get_project_root()}\models\matched_onsets_corpus_chronology')

## Get position of marked beats
Now we can iterate through successive downbeats in a performance and express the position of each marked beat proportionally.

In [None]:
asynchronies = []
# Iterate over every track
for num, track in enumerate(onsets, 1):
    print(f'{num} / {len(onsets)}')
    # if track.item['time_signature'] == 3:
    #     continue
    bandleader = track.item['pianist']
    tempo = track.tempo
    temp = pd.DataFrame(track.summary_dict)
    idx = temp[temp['metre_manual'] == 1].index
    # Iterate through successive downbeats
    for downbeat1, downbeat2 in pairwise(idx):
        # Get all the beats marked between our two downbeats (beat 1 bar 1, beat 1 bar 2)
        bw = temp[(downbeat1 <= temp.index) & (temp.index < downbeat2)]
        sub = bw[utils.INSTRUMENTS_TO_PERFORMER_ROLES.keys()]
        # Get the first downbeat of the first bar, and the last downbeat of the second
        first, last = temp[temp.index == downbeat1]['beats'].iloc[0], temp[temp.index == downbeat2]['beats'].iloc[0]
        # Scale our onsets to be proportional with our first and last values
        prop = (sub - first) / (last - first)
        # Drop values after 1/16th note or before 1/32nd note
        upper_bound = (((bw['metre_manual'] - 1) * 1/4) + 1/16)
        lower_bound = ((bw['metre_manual'] - 1) * 1/4) - 1/32
        for col in prop.columns:
            prop[col][(prop[col] < lower_bound) | (prop[col] > upper_bound)] = np.nan
        # Convert values to degrees
        prop *= 360
        prop = pd.concat([prop, bw['metre_manual']], axis=1)
        # Iterate through each column and add values to our list
        for instr in utils.INSTRUMENTS_TO_PERFORMER_ROLES.keys():
            for _, val in prop[[instr, 'metre_manual']].dropna().iterrows():
                asynchronies.append(dict(instr=instr, asynchrony=val[instr], beat=val['metre_manual'], bandleader=bandleader, tempo=tempo))
df = pd.DataFrame(asynchronies)

In [None]:
print(df[df['instr'] == 'piano'].shape)

## Clean dataset

In [None]:
# First, we sort the dataset so that instruments are in the order piano-bass-drums -- this is for hue mapping
df = (
    df.set_index('instr')
    .loc[utils.INSTRUMENTS_TO_PERFORMER_ROLES.keys()]
    .reset_index(drop=False)
)

In [None]:
# Offset the asynchrony column so that drums average beat 1 is shifted to 0
df['asynchrony_offset'] = df['asynchrony'] - df[(df['instr'] == 'drums') & (df['beat'] == 1)]['asynchrony'].mean()
# Adjust the asynchrony values so that asynchrony is independent of beat location
df['asynchrony_adjusted'] = (df['asynchrony'] / 360) - ((df['beat'] - 1) * 1/4)
# Adjust the offset beat values
df['asynchrony_adjusted_offset'] = (df['asynchrony_offset'] / 360) - ((df['beat'] - 1) * 1/4)

## Create circular plot of asynchrony by beat number

In [None]:
PolarPlotAsynchrony(df).create_plot()
plt.show()

## Bootstrap peaks in KDE

In [None]:
BarPlotProportionalAsynchrony(df).create_plot()

## Plot histogram of beat distribution

In [None]:
HistPlotProportionalAsynchrony(df).create_plot()
plt.show()

## Bootstrap differences per instrument

In [None]:
grouped = df.groupby(['bandleader', 'instr', 'beat']).mean().reset_index(drop=False)
bootstraps = []
for i1, i2 in combinations(grouped['instr'].unique(), 2):
    print(i1, i2)
    val1 = grouped[grouped['instr'] == i1]['asynchrony_adjusted_offset']
    val2 = grouped[grouped['instr'] == i2]['asynchrony_adjusted_offset']
    true_mean = val1.mean() - val2.mean()
    boots = [val1.sample(frac=1, random_state=s, replace=True).mean() - val2.sample(frac=1, random_state=s, replace=True).mean() for s in range(vutils.N_BOOT)]
    bootstraps.append(dict(instr1=i1, instr2=i2, low=np.percentile(boots, 2.5), mean=true_mean, high=np.percentile(boots, 97.5)))
pd.DataFrame(bootstraps)

## Model differences in piano 'lateness' vs tempo

In [None]:
tempo_res = []
for idx, grp in df.groupby(['bandleader', 'tempo']):
    if len(grp.dropna()) < 15:
        continue

    piano = grp[grp['instr'] == 'piano']['asynchrony_adjusted_offset'].mean()
    for instr in ['bass', 'drums']:
        other = grp[grp['instr'] == instr]['asynchrony_adjusted_offset'].mean()
        other_diff = piano - other
        tempo_res.append(dict(bandleader=idx[0], tempo=idx[1], instr=instr, diff=other_diff))
tempo_df = pd.DataFrame(tempo_res).dropna()
tempo_df['tempo_standard'] = (tempo_df['tempo'] - tempo_df['tempo'].mean()) / tempo_df['tempo'].std()

In [None]:
tempo_df.head()

In [None]:
tempo_df.shape

In [None]:
RegPlotPianistAsynchrony(tempo_df).create_plot()
plt.show()

In [None]:
md = smf.mixedlm(
    "diff ~ tempo_standard * C(instr, Treatment(reference='bass'))",
    data=tempo_df,
    groups=tempo_df['bandleader'],
    re_formula="0 + tempo_standard + C(instr, Treatment(reference='bass'))"
).fit()
print(md.summary())
# Variance explained by the fixed effects: we need to use md.predict() with the underlying data to get this
var_fixed = md.predict().var()
# Variance explained by the random effects
var_random = float(md.cov_re.to_numpy().mean())
# Variance of the residuals
var_resid = md.scale
# Total variance of the model
total_var = var_fixed + var_random + var_resid
# Calculate the r2 values and append to the model
print('conditional_r2:', (var_fixed + var_random) / total_var)
print('marginal_r2:', var_fixed / total_var)

In [None]:
md.params * 400

## Drums autocorrelation weak v strong beats

In [None]:
weak_iois = []
strong_iois = []
# Iterate over every track
for num, track in enumerate(onsets[:1], 1):
    print(f'{num} / {len(onsets)}')
    if track.item['time_signature'] == 3:
        continue
    temp = pd.DataFrame(track.summary_dict)[['drums', 'metre_manual']]
    temp['ioi'] = temp['drums'].diff()
    print(temp)

## Get piano - drums/bass asynchronies

In [None]:
asynchronies = []
# Iterate over every track
for num, track in enumerate(onsets, 1):
    print(f'{num} / {len(onsets)}')
    # if track.item['time_signature'] == 3:
    #     continue
    summary_df = pd.DataFrame(track.summary_dict)
    asy = ProportionalAsynchrony(summary_df=summary_df, my_instr_name='piano').asynchronies
    piano = asy[asy['instr'] == 'piano'].reset_index(drop=True)['asynchrony_adjusted']
    bass = asy[asy['instr'] == 'bass'].reset_index(drop=True)['asynchrony_adjusted']
    drums = asy[asy['instr'] == 'drums'].reset_index(drop=True)['asynchrony_adjusted']
    bass_asy = (piano - bass).dropna().values
    drums_asy = (piano - drums).dropna().values
    bass_drums_asy = (bass - drums).dropna().values
    for instr, asyncs in zip(['bass', 'drums', 'bass_drums'], [bass_asy, drums_asy, bass_drums_asy]):
        for async_ in asyncs:
            asynchronies.append(dict(instr=instr, bandleader=track.item['pianist'], asynchrony=async_))

In [None]:
asy_df = pd.DataFrame(asynchronies)

In [None]:
asy_df.groupby('instr').mean() * 100 * 4

In [None]:
jm = asy_df[(asy_df['instr'] == 'bass') & (asy_df['bandleader'] == 'Bud Powell')]['asynchrony']
means = [jm.sample(frac=1, replace=True, random_state=i).mean() for i in range(10000)]
print(np.quantile(means, 0.025), np.quantile(means, 0.975))

In [None]:
HistPlotProportionalAsynchronyTriosPiano(asy_df, include_images=True).create_plot()
plt.show()

In [None]:
from joblib import Parallel, delayed

def process(state_, instr_):
    bls_ = pd.Series(asy_df['bandleader'].unique()).sample(frac=1, replace=True, random_state=state_)
    means_ = pd.concat(asy_df[(asy_df['bandleader'] == bl) & (asy_df['instr'] == instr_)]['asynchrony'] for bl in bls_).mean()
    return means_

for instr in ['bass_drums']:
    means = Parallel(n_jobs=-1, verbose=10)(delayed(process)(state, instr) for state in range(vutils.N_BOOT))
    print(instr, asy_df[asy_df['instr'] == instr]['asynchrony'].mean(), np.percentile(means, 2.5), np.percentile(means, 97.5))