In [1]:
import pandas as pd
from fractions import Fraction
import cufflinks as cf
cf.go_offline()
cf.set_config_file(theme='white')

# Loading the note lists for ~700 compositions

In [2]:
dtypes = dict({col: int for col in ['id', 'mn', 'tpc', 'midi', 'keysig']}, timesig=str)
converters = {col: Fraction for col in ['onset', 'duration']}
notes = pd.read_csv('note_list.tsv', sep='\t', dtype=dtypes, converters=converters)
notes = pd.read_csv('note_list.tsv', sep='\t', converters={'duration': Fraction, 'onset': Fraction})
print(notes.shape)
notes.head(5)

(1079574, 8)


Unnamed: 0,id,mn,onset,duration,tpc,midi,keysig,timesig
0,0,1,0,1/16,3,45,3,12/8
1,0,1,1/16,1/16,7,49,3,12/8
2,0,1,1/8,1/16,4,52,3,12/8
3,0,1,3/16,1/16,3,57,3,12/8
4,0,1,1/4,1/16,8,56,3,12/8


In [48]:
notes.midi.value_counts().iplot('bar')

In [50]:
notes.tpc.value_counts().iplot('bar')

In [52]:
notes.midi.map(lambda x: x%12).value_counts().iplot('bar')

### How many notes in one piece?

In [3]:
notes.groupby('id').size().iplot('hist', bins=50)

### Which tones?

Check Tonal Pitch Classes and MIDI Pitch

In [4]:
notes.tpc.value_counts().iplot('bar')

In [5]:
notes.midi.value_counts().iplot('bar')

### What is the information about pitch that TPC and MIDI express? How dow they relate to each other?

In [6]:
notes['pc'] = notes.midi.map(lambda x: x%12)
notes.pc.value_counts().iplot('bar')

In [7]:
notes['octave'] = notes.midi.map(lambda x: x//12)
notes.groupby('octave').pc.value_counts().unstack().T.iplot()

### Tone counts per piece

In [8]:
norm = lambda df: df.div(df.sum(axis=1), axis=0)

In [9]:
pc_counts = notes.groupby('id').pc.value_counts().unstack()
pc_counts = norm(pc_counts)
pc_counts.T.iplot()

### Tone durations per piece

In [10]:
pc_durations = notes.groupby(['id', 'pc']).duration.sum().unstack()
pc_durations = norm(pc_durations).astype(float)
pc_durations.T.iplot()

### Rotate rows to start with maximum

In [11]:
import numpy as np

rotate_to_max = lambda row: pd.Series(np.roll(row, -row.idxmax()))

In [12]:
rolled_count = pc_counts.apply(rotate_to_max, axis=1)
rolled_count.T.iplot()

In [13]:
rolled_dur = pc_durations.apply(rotate_to_max, axis=1)
rolled_dur.T.iplot()

In [14]:
second_most = rolled_dur[range(1,12)].idxmax(axis=1)
second_most.value_counts()

5     324
7     252
4      28
3      26
8      21
9      12
2      12
10     10
1       3
dtype: int64

In [21]:
fourth_to_root = lambda row: pd.Series(np.roll(row, -5))

rolled_correct = rolled_dur.copy()
rolled_correct.loc[second_most == 5] = rolled_correct.loc[second_most == 5].apply(fourth_to_root, axis=1)
rolled_correct.T.iplot()

In [41]:
mode = rolled_correct.apply(lambda r: 'minor' if r[3] > r[4] else 'major', axis=1)
mode.head(20)

id
0     major
1     major
2     major
3     major
4     major
5     major
6     major
7     major
8     minor
9     major
10    minor
11    minor
12    minor
13    minor
14    minor
15    minor
16    major
17    minor
18    minor
19    minor
dtype: object

### Ground truth keys

In [18]:
keys = pd.read_csv('keys.tsv', sep='\t', index_col=0)
keys.head()

Unnamed: 0_level_0,globalkey
id,Unnamed: 1_level_1
0,A
1,A
2,A
3,A
4,A


In [24]:
keys['mode'] = keys.globalkey.str.isupper().replace({True: 'major', False:'minor'})
keys.head(20)

Unnamed: 0_level_0,globalkey,mode
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,A,major
1,A,major
2,A,major
3,A,major
4,A,major
5,A,major
6,A,major
7,A,major
8,a,minor
9,A,major


In [43]:
keys['predicted'] = mode

In [44]:
(keys['mode'] == keys.predicted).value_counts()

True     510
False    124
dtype: int64

### With TPCs

In [17]:
tpc_durations = notes.groupby(['id', 'tpc']).duration.sum().unstack().astype(float)
tpc_durations

tpc,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,...,9,10,11,12,13,14,15,16,17,18
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,3.87500,2.5000,1.8750,,,,,,,
1,,,,,,,,,,,...,5.50000,3.3125,1.0625,0.9375,,,,,,
2,,,,,,,,,,,...,3.12500,0.5000,1.8750,,,,,,,
3,,,,,,,,,,,...,1.87500,2.6250,,,,,,,,
4,,,,,,,,,,,...,1.50000,2.5000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692,,,,,,,,0.875000,1.250000,16.500000,...,20.00000,2.7500,0.6250,0.7500,,,,,,
693,,,,,,0.25000,3.812500,1.895833,12.260417,27.322917,...,1.71875,,,,,,,,,
694,,,,,,,0.500000,1.500000,33.000000,31.000000,...,0.37500,,,,,,,,,
695,,,0.25,0.40625,0.40625,11.96875,25.437500,30.531250,13.187500,9.531250,...,2.00000,0.6875,0.8750,1.2500,0.0625,,,,,
