In [1]:
import pandas as pd
import numpy as np
from music21 import converter, midi, interval, pitch
from mido import MidiFile
import miditoolkit
import os
from os import walk
from tokenizing_functions import convert_to_note_items, get_file_and_dirnames
#from helper_functions import get_file_and_dirnames
#from analysis_functions import analyse_data_folder
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

PATH_TRANSPOSED = "../0_data/4_preprocessed_sets"

In [2]:
MIN_DURATION_DENOMINATOR = 32
DURATION_STEPS = 64
POSITION_STEPS = 16
TICKS_PER_BEAT = 1024
TICKS_PER_MIN_DURATION = TICKS_PER_BEAT*4/MIN_DURATION_DENOMINATOR
DURATION_BINS = np.arange(TICKS_PER_MIN_DURATION, (TICKS_PER_MIN_DURATION*DURATION_STEPS)+1, TICKS_PER_MIN_DURATION, dtype=int)

In [3]:
dir = "17_POP909-Dataset-master"
file = "111.mid"
files,_ = get_file_and_dirnames(f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}')
files.sort()
files[:10]

['002.mid',
 '003.mid',
 '004.mid',
 '005.mid',
 '006.mid',
 '007.mid',
 '008.mid',
 '009.mid',
 '010.mid',
 '011.mid']

# Duration Analysis

In [25]:
# now check duration times
def check_duration(items):
    durations = []
    for item in items:
        duration = item["end"] - item["start"]
        durations.append(duration)
    return durations

def get_duration_count(durations):
    durations_count = {}
    for duration in set(durations):
        durations_count[duration] = durations.count(duration)
    return durations_count

#def get_durations_in_bin48(duration_dic, bin_48=1024*4/48):
#    updated_duration_dic = {}
#    for length in duration_dic:
#        if length < int(round(length/bin_48,2)):
#            print(f"length {length} is less than bin_48 {bin_48}")
#        updated_duration_dic[f"{int(round(length/bin_48,2))}/48"] = duration_dic[length]
#    return updated_duration_dic

def get_durations_in_bins(duration_dic, space):
    updated_duration_dic = {}
    for i in space:
        updated_duration_dic[i] = 0

    for duration in duration_dic:
        for i in range(len(space)-1):
            if duration >= space[i] and duration < space[i+1]:
                updated_duration_dic[space[i]] += duration_dic[duration]
                break
        if duration > space[-1]:
            updated_duration_dic[space[-1]] += duration_dic[duration]

    return updated_duration_dic

def check_triole_seq(items, lower_bound=85, upper_bound=86):
    seq = []
    triole_seq = 0
    for item in items:
        duration = item["end"] - item["start"]
        if duration >= lower_bound and duration <= upper_bound:
            triole_seq += 1
        else:
            seq.append(triole_seq)
            triole_seq = 0
    return seq

def get_triole_count(sequences):
    trioles_count = {}
    for seq in set(sequences):
        trioles_count[seq] = sequences.count(seq)
    return trioles_count

In [7]:
df_durations = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    duration_dic = get_duration_count(check_duration(note_items))
    duration_count = {"name": f}
    #duration_count.update(get_durations_in_self_bins(duration_dic, space))
    duration_count.update(duration_dic)
    df_durations = df_durations.append(duration_count, ignore_index=True)
df_durations.fillna(0, inplace=True)
df_durations

100%|██████████| 803/803 [00:09<00:00, 86.07it/s]


Unnamed: 0,name,256,512,768,1024,1280,2816,3840,3328,4608,...,13653,9643,14336,9984,29781,4267,8960,23040,9557,427
0,002.mid,241.0,30.0,19.0,5.0,6.0,1.0,1.0,2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,003.mid,301.0,76.0,5.0,6.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,004.mid,52.0,41.0,0.0,13.0,1.0,0.0,0.0,3.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,005.mid,2.0,116.0,11.0,23.0,2.0,1.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,006.mid,2.0,166.0,0.0,45.0,1.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,905.mid,6.0,58.0,10.0,2.0,2.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
799,906.mid,116.0,12.0,5.0,9.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
800,907.mid,84.0,36.0,40.0,4.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
801,908.mid,127.0,56.0,41.0,3.0,8.0,4.0,1.0,2.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
sorted([col for col in df_durations.columns if not type(col) == str])

[1,
 85,
 86,
 170,
 171,
 256,
 341,
 342,
 427,
 512,
 683,
 768,
 1024,
 1280,
 1365,
 1536,
 1707,
 1792,
 2048,
 2304,
 2389,
 2560,
 2731,
 2816,
 3072,
 3328,
 3413,
 3584,
 3755,
 3840,
 4096,
 4267,
 4352,
 4437,
 4608,
 4779,
 4864,
 4949,
 5120,
 5291,
 5376,
 5461,
 5632,
 5803,
 5888,
 6144,
 6400,
 6485,
 6656,
 6827,
 6912,
 6997,
 7168,
 7424,
 7509,
 7680,
 7851,
 7936,
 8192,
 8448,
 8533,
 8704,
 8875,
 8960,
 9216,
 9472,
 9557,
 9643,
 9728,
 9984,
 10240,
 10496,
 10752,
 11008,
 11264,
 11776,
 12032,
 12288,
 12629,
 13312,
 13653,
 14080,
 14165,
 14336,
 14848,
 15019,
 15360,
 16384,
 19968,
 20992,
 22187,
 23040,
 24320,
 28672,
 29781,
 33280,
 52480]

In [9]:
# Abweichungen längen von 48 bins
durations = sorted([col for col in df_durations.columns if not type(col) == str])
durations = [dur/(1024*4/16) for dur in durations]
durations = [dur - int(dur) for dur in durations]
set(durations)

{0.0, 0.00390625, 0.33203125, 0.3359375, 0.6640625, 0.66796875}

In [10]:
len([col for col in df_durations.columns if not type(col) == str and col > 4096])

66

In [11]:
space = [0, 85, 128, 256, 341, 512, 1024, 2048, 4096, 8192]

df_durations_bins = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    duration_dic = get_duration_count(check_duration(note_items))
    duration_count = {"name": f}
    duration_count.update(get_durations_in_bins(duration_dic, space))
    df_durations_bins = df_durations_bins.append(duration_count, ignore_index=True)
df_durations_bins.fillna(0, inplace=True)
df_durations_bins

100%|██████████| 803/803 [00:06<00:00, 132.02it/s]


Unnamed: 0,name,0,85,128,256,341,512,1024,2048,4096,8192
0,002.mid,0,0,0,241,3,50,11,4,1,0
1,003.mid,0,0,0,301,23,85,6,7,0,0
2,004.mid,0,0,0,52,0,41,38,13,12,0
3,005.mid,0,9,7,2,140,141,38,25,4,1
4,006.mid,0,77,0,2,130,168,59,26,4,0
...,...,...,...,...,...,...,...,...,...,...,...
798,905.mid,0,0,0,6,20,88,10,10,0,0
799,906.mid,0,0,4,116,121,24,23,1,1,0
800,907.mid,0,0,0,84,0,92,18,14,2,0
801,908.mid,0,0,0,127,2,100,23,21,0,0


In [12]:
print("songs with notes shorter than 1/48: ", df_durations_bins[df_durations_bins[0] > 0].shape[0], 
      " - ", np.round(df_durations_bins[df_durations_bins[0] > 0].shape[0]/df_durations_bins.shape[0]*100, 2), "%")
print("sum of notes shorter than 1/48: ", df_durations_bins[0].sum())
print()
print("songs with notes between 1 and 2 bars: ", df_durations_bins[df_durations_bins[4096] > 0].shape[0], 
      " - ", np.round(df_durations_bins[df_durations_bins[4096] > 0].shape[0]/df_durations_bins.shape[0]*100, 2), "%")
print("sum of notes between 1 and 2 bars: ", df_durations_bins[4096].sum())
print()
print("songs with notes longer than 2 bars: ", df_durations_bins[df_durations_bins[8192] > 0].shape[0], 
      " - ", np.round(df_durations_bins[df_durations_bins[8192] > 0].shape[0]/df_durations_bins.shape[0]*100, 2), "%")
print("sum of notes longer than 2 bars: ", df_durations_bins[8192].sum())

songs with notes shorter than 1/48:  18  -  2.24 %
sum of notes shorter than 1/48:  70

songs with notes between 1 and 2 bars:  373  -  46.45 %
sum of notes between 1 and 2 bars:  1837

songs with notes longer than 2 bars:  43  -  5.35 %
sum of notes longer than 2 bars:  82


In [30]:
df_triole_seq = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    sequences = get_triole_count(check_triole_seq(note_items, lower_bound=85, upper_bound=86))
    sequence_count = {"name": f}
    sequence_count.update(sequences)
    df_triole_seq = df_triole_seq.append(sequence_count, ignore_index=True)
df_triole_seq.fillna(0, inplace=True)
df_triole_seq

100%|██████████| 803/803 [00:06<00:00, 129.96it/s]


Unnamed: 0,name,0,1,2
0,002.mid,310,0.0,0.0
1,003.mid,422,0.0,0.0
2,004.mid,156,0.0,0.0
3,005.mid,349,9.0,0.0
4,006.mid,314,77.0,0.0
...,...,...,...,...
798,905.mid,134,0.0,0.0
799,906.mid,290,0.0,0.0
800,907.mid,210,0.0,0.0
801,908.mid,273,0.0,0.0


In [34]:
print("songs with single 1/16 triole: ", df_triole_seq[df_triole_seq[1] > 0].shape[0], 
      " - ", np.round(df_triole_seq[df_triole_seq[1] > 0].shape[0]/df_triole_seq.shape[0]*100, 2), "%")
print("sum of single 1/16 triole sequences: ", df_triole_seq[1].sum())
print()
print("songs with double 1/16 triole: ", df_triole_seq[df_triole_seq[2] > 0].shape[0], 
      " - ", np.round(df_triole_seq[df_triole_seq[2] > 0].shape[0]/df_triole_seq.shape[0]*100, 2), "%")
print("sum of double 1/16 triole sequences: ", df_triole_seq[2].sum())
print()

songs with single 1/16 triole:  237  -  29.51 %
sum of single 1/16 triole sequences:  2511.0

songs with double 1/16 triole:  6  -  0.75 %
sum of double 1/16 triole sequences:  26.0



In [35]:
df_triole_seq = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    sequences = get_triole_count(check_triole_seq(note_items, lower_bound=170, upper_bound=171))
    sequence_count = {"name": f}
    sequence_count.update(sequences)
    df_triole_seq = df_triole_seq.append(sequence_count, ignore_index=True)
df_triole_seq.fillna(0, inplace=True)
df_triole_seq

100%|██████████| 803/803 [00:06<00:00, 127.50it/s]


Unnamed: 0,name,0,1
0,002.mid,310,0.0
1,003.mid,422,0.0
2,004.mid,156,0.0
3,005.mid,353,7.0
4,006.mid,468,0.0
...,...,...,...
798,905.mid,134,0.0
799,906.mid,282,4.0
800,907.mid,210,0.0
801,908.mid,273,0.0


In [36]:
print("songs with single 1/8 triole: ", df_triole_seq[df_triole_seq[1] > 0].shape[0], 
      " - ", np.round(df_triole_seq[df_triole_seq[1] > 0].shape[0]/df_triole_seq.shape[0]*100, 2), "%")
print("sum of single 1/8 triole sequences: ", df_triole_seq[1].sum())

songs with single 1/8 triole:  128  -  15.94 %
sum of single 1/8 triole sequences:  737.0


In [37]:
df_triole_seq = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    sequences = get_triole_count(check_triole_seq(note_items, lower_bound=341, upper_bound=342))
    sequence_count = {"name": f}
    sequence_count.update(sequences)
    df_triole_seq = df_triole_seq.append(sequence_count, ignore_index=True)
df_triole_seq.fillna(0, inplace=True)
df_triole_seq

100%|██████████| 803/803 [00:06<00:00, 117.49it/s]


Unnamed: 0,name,0,3,1,2,4,5,6,7,10,9,17,19,8,14,11
0,002.mid,306,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,003.mid,376,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,004.mid,156,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,005.mid,93,0.0,128.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,006.mid,210,0.0,126.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
798,905.mid,101,2.0,8.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
799,906.mid,106,10.0,35.0,10.0,6.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
800,907.mid,210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
801,908.mid,270,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
print("1/4 trioles:")
for i in sorted(df_triole_seq.columns[1:]):
    print(f"songs with {i} triole: ", df_triole_seq[df_triole_seq[i] > 0].shape[0], 
          " - ", np.round(df_triole_seq[df_triole_seq[i] > 0].shape[0]/df_triole_seq.shape[0]*100, 2), "%", 
          " - sum ", df_triole_seq[i].sum() 
    )

1/4 trioles:
songs with 0 triole:  803  -  100.0 %  - sum  192286
songs with 1 triole:  656  -  81.69 %  - sum  21295.0
songs with 2 triole:  410  -  51.06 %  - sum  6073.0
songs with 3 triole:  246  -  30.64 %  - sum  1687.0
songs with 4 triole:  163  -  20.3 %  - sum  934.0
songs with 5 triole:  96  -  11.96 %  - sum  479.0
songs with 6 triole:  55  -  6.85 %  - sum  215.0
songs with 7 triole:  23  -  2.86 %  - sum  99.0
songs with 8 triole:  17  -  2.12 %  - sum  51.0
songs with 9 triole:  8  -  1.0 %  - sum  22.0
songs with 10 triole:  7  -  0.87 %  - sum  17.0
songs with 11 triole:  4  -  0.5 %  - sum  12.0
songs with 14 triole:  2  -  0.25 %  - sum  4.0
songs with 17 triole:  1  -  0.12 %  - sum  3.0
songs with 19 triole:  1  -  0.12 %  - sum  3.0


# Time Shift Analysis

In [4]:
def compute_shifts(items, ticks_per_position=1024*4/POSITION_STEPS):
    # grid
    grids = np.arange(0, items[-1]["start"]+1, ticks_per_position, dtype=float)
    grids = grids.astype(int)
    # process
    shifts = []
    for item in items:
        index = np.argmin(abs(grids - item["start"]))
        shift = grids[index] - item["start"]
        shifts.append(shift)
    return shifts

def get_shifts_count(shifts):
    shifts_count = {}
    for shift in set(shifts):
        shifts_count[shift] = shifts.count(shift)
    return shifts_count

In [5]:
df = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    shifts = compute_shifts(note_items, ticks_per_position=1024*4/POSITION_STEPS)
    shift_count = {"name": f}
    shift_count.update(get_shifts_count(shifts))
    df = df.append(shift_count, ignore_index=True)
df.fillna(0, inplace=True)
df["abs_85"] = df[-85]+df[85]
df["shift_count"] = df[-85]+df[85]+df[-171]
df

100%|██████████| 803/803 [00:08<00:00, 100.36it/s]


Unnamed: 0,name,0,-85,85,-171,abs_85,shift_count
0,002.mid,308,1.0,1.0,0.0,2.0,2.0
1,003.mid,422,0.0,0.0,0.0,0.0,0.0
2,004.mid,156,0.0,0.0,0.0,0.0,0.0
3,005.mid,146,99.0,121.0,1.0,220.0,221.0
4,006.mid,256,206.0,6.0,0.0,212.0,212.0
...,...,...,...,...,...,...,...
798,905.mid,86,45.0,3.0,0.0,48.0,48.0
799,906.mid,162,85.0,43.0,0.0,128.0,128.0
800,907.mid,210,0.0,0.0,0.0,0.0,0.0
801,908.mid,273,0.0,0.0,0.0,0.0,0.0


In [6]:
print("songs with 85 shift:", df[df["abs_85"]>0].shape[0])
print("max 85 shift:", df[df["abs_85"]>0]["abs_85"].max())
print("min 85 shift:", df[df["abs_85"]>0]["abs_85"].min())
print("mean 85 shift:", df[df["abs_85"]>0]["abs_85"].mean())
print()
print("songs with -171 shift:", df[df[-171]>0].shape[0])
print("max -171 shift:", df[df[-171]>0][-171].max())
print("min -171 shift:", df[df[-171]>0][-171].min())
print("mean -171 shift:", df[df[-171]>0][-171].mean())
print()
print("songs with shifts:", df[df["shift_count"]>0].shape[0])
print("max shift:", df[df["shift_count"]>0]["shift_count"].max())
print("min shift:", df[df["shift_count"]>0]["shift_count"].min())
print("mean shift:", df[df["shift_count"]>0]["shift_count"].mean())
print()
print("songs with no shifts:", df[df["shift_count"]==0].shape[0])

songs with 85 shift: 585
max 85 shift: 410.0
min 85 shift: 1.0
mean 85 shift: 117.07863247863249

songs with -171 shift: 93
max -171 shift: 1.0
min -171 shift: 1.0
mean -171 shift: 1.0

songs with shifts: 585
max shift: 410.0
min shift: 1.0
mean shift: 117.23760683760683

songs with no shifts: 218


In [7]:
df[df[-171]>0]

Unnamed: 0,name,0,-85,85,-171,abs_85,shift_count
3,005.mid,146,99.0,121.0,1.0,220.0,221.0
9,011.mid,188,79.0,159.0,1.0,238.0,239.0
10,012.mid,284,17.0,138.0,1.0,155.0,156.0
12,014.mid,228,0.0,46.0,1.0,46.0,47.0
37,043.mid,209,0.0,191.0,1.0,191.0,192.0
...,...,...,...,...,...,...,...
765,867.mid,107,131.0,162.0,1.0,293.0,294.0
766,868.mid,360,1.0,0.0,1.0,1.0,2.0
778,882.mid,223,0.0,172.0,1.0,172.0,173.0
782,887.mid,110,0.0,205.0,1.0,205.0,206.0


In [76]:
POSITION_STEPS_32 = 32

df_32 = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    shifts = compute_shifts(note_items, ticks_per_position=1024*4/POSITION_STEPS_32)
    shift_count = {"name": f}
    shift_count.update(get_shifts_count(shifts))
    df_32 = df_32.append(shift_count, ignore_index=True)
df_32.fillna(0, inplace=True)
df_32["abs_43"] = df_32[-43]+df_32[43]
df_32["shift_count"] = df_32[-43]+df_32[43]+df_32[-85]
df_32

100%|██████████| 803/803 [00:08<00:00, 95.17it/s] 


Unnamed: 0,name,0,43,-43,-85,abs_43,shift_count
0,002.mid,308,1.0,1.0,0.0,2.0,2.0
1,003.mid,422,0.0,0.0,0.0,0.0,0.0
2,004.mid,156,0.0,0.0,0.0,0.0,0.0
3,005.mid,146,99.0,122.0,0.0,221.0,221.0
4,006.mid,256,205.0,6.0,1.0,211.0,212.0
...,...,...,...,...,...,...,...
798,905.mid,86,45.0,3.0,0.0,48.0,48.0
799,906.mid,162,85.0,43.0,0.0,128.0,128.0
800,907.mid,210,0.0,0.0,0.0,0.0,0.0
801,908.mid,273,0.0,0.0,0.0,0.0,0.0


In [77]:
print("songs with 43 shift:", df_32[df_32["abs_43"]>0].shape[0])
print("max 43 shift:", df_32[df_32["abs_43"]>0]["abs_43"].max())
print("min 43 shift:", df_32[df_32["abs_43"]>0]["abs_43"].min())
print("mean 43 shift:", df_32[df_32["abs_43"]>0]["abs_43"].mean())
print()
print("songs with -85 shift:", df_32[df_32[-85]>0].shape[0])
print("max -85 shift:", df_32[df_32[-85]>0][-85].max())
print("min -85 shift:", df_32[df_32[-85]>0][-85].min())
print("mean -85 shift:", df_32[df_32[-85]>0][-85].mean())
print()
print("songs with shifts:", df_32[df_32["shift_count"]>0].shape[0])
print("max shift:", df_32[df_32["shift_count"]>0]["shift_count"].max())
print("min shift:", df_32[df_32["shift_count"]>0]["shift_count"].min())
print("mean shift:", df_32[df_32["shift_count"]>0]["shift_count"].mean())
print()
print("songs with no shifts:", df_32[df_32["shift_count"]==0].shape[0])

songs with 43 shift: 583
max 43 shift: 409.0
min 43 shift: 1.0
mean 43 shift: 117.48885077186964

songs with -85 shift: 88
max -85 shift: 1.0
min -85 shift: 1.0
mean -85 shift: 1.0

songs with shifts: 585
max shift: 410.0
min shift: 1.0
mean shift: 117.23760683760683

songs with no shifts: 218


In [78]:
POSITION_STEPS_48 = 48

df_48 = pd.DataFrame()
for f in tqdm(files):
    path = f'{PATH_TRANSPOSED}/c)_transposed_octave/{dir}/{f}'
    note_items = convert_to_note_items(path)
    shifts = compute_shifts(note_items, ticks_per_position=1024*4/POSITION_STEPS_48)
    shift_count = {"name": f}
    shift_count.update(get_shifts_count(shifts))
    df_48 = df_48.append(shift_count, ignore_index=True)
df_48.fillna(0, inplace=True)
df_48

100%|██████████| 803/803 [00:08<00:00, 93.90it/s] 


Unnamed: 0,name,0,-1
0,002.mid,309,1.0
1,003.mid,422,0.0
2,004.mid,156,0.0
3,005.mid,245,122.0
4,006.mid,462,6.0
...,...,...,...
798,905.mid,131,3.0
799,906.mid,247,43.0
800,907.mid,210,0.0
801,908.mid,273,0.0


In [79]:
print("songs with -1 shift:", df_48[df_48[-1]>0].shape[0])
print("max -1 shift:", df_48[df_48[-1]>0][-1].max())
print("min -1 shift:", df_48[df_48[-1]>0][-1].min())
print("mean -1 shift:", df_48[df_48[-1]>0][-1].mean())
print()
print("songs with no shifts:", df_48[df_48[-1]==0].shape[0])

songs with -1 shift: 461
max -1 shift: 326.0
min -1 shift: 1.0
mean -1 shift: 77.8004338394794

songs with no shifts: 342


Other Code

In [80]:
"""
TICKS_PER_BEAT = 1024
note_length_to_ticks = {
    "1": TICKS_PER_BEAT*4,
    "1/2 ..": TICKS_PER_BEAT*2*1.75,
    "1/2 .": TICKS_PER_BEAT*2*1.5,
    "1/2": TICKS_PER_BEAT*2,
    "1/4 ..": TICKS_PER_BEAT*1.75,
    "1/4 .": TICKS_PER_BEAT*1.5,
    "1/4": TICKS_PER_BEAT,
    "1/8 ..": TICKS_PER_BEAT/2*1.75,
    "1/8 .": TICKS_PER_BEAT/2*1.5,
    "1/8": TICKS_PER_BEAT/2,
    "1/16 ..": TICKS_PER_BEAT/4*1.75,
    "1/16 .": TICKS_PER_BEAT/4*1.5,
    "1/16": TICKS_PER_BEAT/4,
    "1/32 ..": TICKS_PER_BEAT/8*1.75,
    "1/32 .": TICKS_PER_BEAT/8*1.5,
    "1/32": TICKS_PER_BEAT/8,
    "1/64 ..": TICKS_PER_BEAT/16*1.75,
    "1/64 .": TICKS_PER_BEAT/16*1.5,
    "1/64": TICKS_PER_BEAT/16,
}
note_length_to_ticks



TICKS_PER_BAR = 1024*4

for ticks in duration_dic:
    #print("now ticks is", ticks)
    #print()
    for i in range(0, 7):
        #print("now denominator is", 2**i)
        denominator = 2**i
        length = TICKS_PER_BAR/(denominator)
    
        if ticks % length == 0:
            print(ticks, round(length,0), f"{int(ticks/length)}/{denominator}")
            break
            #duration_dic[f"1/{length}"] = duration_dic.pop(ticks)
            #break
        elif ticks % length/3 == 0:
            print(ticks, round(length/3,0), f"{int(ticks/length)}/{denominator}")
            break
    #print(ticks, "here")
    

TICKS_PER_BAR = 1024*4
note_length_to_ticks = {}
for i in range(0, 7):
    # notes in log 2 space
    note_length_to_ticks[f"1/{2**i}"] = TICKS_PER_BAR/(2**i)
    # trioles of those notes
    note_length_to_ticks[f"1/{2**i*3}"] = TICKS_PER_BAR/(2**i*3)
update_dic = {}
for length in note_length_to_ticks:
    # dotted and two dotted versions of notes
    update_dic[f"{length} .."] = note_length_to_ticks[length]*1.75
    update_dic[f"{length} ."] = note_length_to_ticks[length]*1.5
note_length_to_ticks.update(update_dic)
note_length_to_ticks = {k:round(note_length_to_ticks[k]) for k in note_length_to_ticks}
note_length_to_ticks
    
    """
print("zwischensave")

zwischensave
