In [1]:
from ms3 import Parse
from ms3.utils import transform, roman_numeral2tpc, fifths2pc, fifths2name, name2tpc
import pandas as pd
pd.options.display.max_columns = 50

# Load expanded ABC labels

In [2]:
dir = '/home/hentsche/ABC/harmonies'
p = Parse(dir, key='abc', file_re='tsv$', index='fname')
p.parse_tsv()
p

70 files.
KEY -> EXTENSIONS
-----------------
abc -> {'.tsv': 70}

All 70 tabular files have been parsed, 70 of them as Annotations object(s).
KEY -> ANNOTATION LAYERS
------------------------
abc -> staff  voice  label_type
    -> 1      1      0 (dcml)        117
    ->               3 (dcml)      27908
    ->        2      3 (dcml)          2

In [3]:
labels = p.get_labels().sort_index().iloc[:,:-3]
labels

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
n01op18-1_01,0,1,1,0,0,3/4,1,1,.F.I,F,I,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0
n01op18-1_01,1,2,2,0,0,3/4,1,1,V,F,I,,V,,V,,,,,,M,False,False,"(1, 5, 2)",(),1,1
n01op18-1_01,2,3,3,0,0,3/4,1,1,I,F,I,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0
n01op18-1_01,3,4,4,0,0,3/4,1,1,IV6,F,I,,IV6,,IV,,6,,,,M,False,False,"(3, 0, -1)",(),-1,3
n01op18-1_01,4,6,6,0,0,3/4,1,1,V65,F,I,,V65,,V,,65,,,,Mm7,False,False,"(5, 2, -1, 1)",(),1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
n16op135_04,557,281,281,1/2,1/2,4/4,1,1,I,f,I,,I,,I,,,,,,M,True,False,"(0, 4, 1)",(),0,0
n16op135_04,558,281,281,3/4,3/4,4/4,1,1,I6,f,I,,I6,,I,,6,,,,M,True,False,"(4, 1, 0)",(),0,4
n16op135_04,559,282,282,0,0,4/4,1,1,V(64),f,I,,V(64),,V,,,64,,,M,True,False,"(1, 0, 4)",(),1,1
n16op135_04,560,282,282,1/2,1/2,4/4,1,1,V7,f,I,,V7,,V,,7,,,,Mm7,True,False,"(1, 5, 2, -1)",(),1,1


# Transform `bass_note` column

## Expressing all bass notes as scale scale degrees of global tonic
Since all scale degrees are expressed as fifths-intervals, this is as easy as adding the local key expressed as fifths

In [4]:
transpose_by = transform(labels, roman_numeral2tpc, ['localkey', 'globalkey_is_minor'])
bass = labels.bass_note + transpose_by
bass.head()

fname          
n01op18-1_01  0    0
              1    1
              2    0
              3    3
              4    5
dtype: Int64

## Adding bass note names to DataFrame

In [5]:
transpose_by = transform(labels, name2tpc, ['globalkey'])
labels['bass_name'] = fifths2name(bass + transpose_by).values
labels.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note,bass_name
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
n01op18-1_01,0,1,1,0,0,3/4,1,1,.F.I,F,I,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0,F
n01op18-1_01,1,2,2,0,0,3/4,1,1,V,F,I,,V,,V,,,,,,M,False,False,"(1, 5, 2)",(),1,1,C
n01op18-1_01,2,3,3,0,0,3/4,1,1,I,F,I,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0,F
n01op18-1_01,3,4,4,0,0,3/4,1,1,IV6,F,I,,IV6,,IV,,6.0,,,,M,False,False,"(3, 0, -1)",(),-1,3,D
n01op18-1_01,4,6,6,0,0,3/4,1,1,V65,F,I,,V65,,V,,65.0,,,,Mm7,False,False,"(5, 2, -1, 1)",(),1,5,E


## Calculating intervals between successive bass notes
Sloppy version: Include intervals across movement boundaries

### Bass progressions expressed in fifths

In [6]:
bass = bass.bfill()
ivs = bass - bass.shift()
ivs.value_counts()

 0     4809
-1     3918
-5     2903
-2     2746
 5     2537
 2     2490
 1     2061
 3     1576
-4     1269
 4     1055
-3      983
 7      400
 6      332
-7      233
-6      170
 8      147
 9       95
-8       76
-9       68
 10      52
-10      27
 12      27
-12      18
-11       8
-13       6
-14       5
 13       5
 14       4
 11       3
 16       1
 17       1
 15       1
dtype: Int64

### Bass progressions expressed in (enharmonic) semitones

In [7]:
pc_ivs = fifths2pc(ivs)
pc_ivs.index = ivs.index
pc_ivs = pc_ivs.where(pc_ivs <= 6, pc_ivs % -6).fillna(0)
pc_ivs.value_counts()

 0    4855
 5    3927
 1    3303
-2    2803
-1    2771
 2    2521
-5    2074
-3    1645
-4    1416
 4    1132
 3    1078
 6     502
dtype: int64

# Chromatic bass progressions

In [8]:
def cnt(S, interval, k_min=1, df=True):
    ix_chunks = pd.DataFrame(columns=['ixs', 'n']) if df else []
    current = []
    n = 0
    for i, iv in S.iteritems():
        
        if iv in [0, interval]:
            current.append(i)
            if iv == interval:
                n += 1
        else:
            if n >= k_min:
                if df:
                    ix_chunks = ix_chunks.append(pd.Series((current, n), index=['ixs', 'n']), ignore_index=True)
                else:
                    ix_chunks.append((current, n))
            current = [i]
            n = 0
    return ix_chunks

## Successive descending semitones

In [9]:
desc = cnt(pc_ivs, -1)
desc.n.value_counts()

1    2233
2     114
3      62
4      13
5       9
7       3
6       1
Name: n, dtype: int64

### Storing those with three or more

In [10]:
three_desc = labels.loc[desc[desc.n > 2].ixs.sum()]
three_desc.to_csv('three_desc.tsv', sep='\t')
three_desc.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note,bass_name
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
n02op18-2_02,160,84,83,0,0,3/4,1,1,V2/IV,C,I,,V2/IV,,V,,2.0,,IV,,Mm7,False,False,"(-2, 0, 4, 1)",(),0,-2,Bb
n02op18-2_02,161,85,84,0,0,3/4,1,1,IV6,C,I,,IV6,,IV,,6.0,,,,M,False,False,"(3, 0, -1)",(),-1,3,A
n02op18-2_02,162,85,84,1/4,1/4,3/4,1,1,ii43,C,I,,ii43,,ii,,43.0,,,,mm7,False,False,"(3, 0, 2, -1)",(),2,3,A
n02op18-2_02,163,85,84,1/2,1/2,3/4,1,1,.Ger6,C,I,,Ger6,Ger,vii,o,65.0,b3,V,,Ger,False,False,"(-4, 0, -3, 6)",(),6,-4,Ab
n02op18-2_02,164,86,85,0,0,3/4,1,1,V(64),C,I,,V(64),,V,,,64,,,M,False,False,"(1, 0, 4)",(),1,1,G
n02op18-2_02,165,86,85,1/2,1/2,3/4,1,1,V7,C,I,,V7,,V,,7.0,,,,Mm7,False,False,"(1, 5, 2, -1)",(),1,1,G
n02op18-2_03,81,47,46,0,0,3/4,1,1,I,G,IV,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0,C
n02op18-2_03,82,48,47,0,0,3/4,1,1,V65,G,IV,,V65,,V,,65.0,,,,Mm7,False,False,"(5, 2, -1, 1)",(),1,5,B
n02op18-2_03,83,49,48,0,0,3/4,1,1,V2/IV,G,IV,,V2/IV,,V,,2.0,,IV,,Mm7,False,False,"(-2, 0, 4, 1)",(),0,-2,Bb
n02op18-2_03,84,50,49,0,0,3/4,1,1,IV6,G,IV,,IV6,,IV,,6.0,,,,M,False,False,"(3, 0, -1)",(),-1,3,A


### Storing those with four or more

In [11]:
four_desc = labels.loc[desc[desc.n > 3].ixs.sum()]
four_desc.to_csv('four_desc.tsv', sep='\t')
four_desc.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note,bass_name
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
n02op18-2_03,81,47,46,0,0,3/4,1,1,I,G,IV,,I,,I,,,,,,M,False,False,"(0, 4, 1)",(),0,0,C
n02op18-2_03,82,48,47,0,0,3/4,1,1,V65,G,IV,,V65,,V,,65.0,,,,Mm7,False,False,"(5, 2, -1, 1)",(),1,5,B
n02op18-2_03,83,49,48,0,0,3/4,1,1,V2/IV,G,IV,,V2/IV,,V,,2.0,,IV,,Mm7,False,False,"(-2, 0, 4, 1)",(),0,-2,Bb
n02op18-2_03,84,50,49,0,0,3/4,1,1,IV6,G,IV,,IV6,,IV,,6.0,,,,M,False,False,"(3, 0, -1)",(),-1,3,A
n02op18-2_03,85,51,50,0,0,3/4,1,1,It6,G,IV,,It6,It,vii,o,6.0,b3,V,,It,False,False,"(-4, 0, 6)",(),6,-4,Ab
n02op18-2_03,86,52,51,0,0,3/4,1,1,V,G,IV,,V,,V,,,,,,M,False,False,"(1, 5, 2)",(),1,1,G
n02op18-2_04,192,160,160,3/8,3/8,2/4,1,1,V65,G,iv,,V65,,V,,65.0,,,,Mm7,False,True,"(5, 2, -1, 1)",(),1,5,B
n02op18-2_04,193,161,161,3/8,3/8,2/4,1,1,V2/IV,G,iv,,V2/IV,,V,,2.0,,IV,,Mm7,False,True,"(-2, 0, 4, 1)",(),0,-2,Bb
n02op18-2_04,194,162,162,0,0,2/4,1,1,IV6,G,iv,,IV6,,IV,,6.0,,,,M,False,True,"(3, 0, -1)",(),-1,3,A
n02op18-2_04,195,162,162,3/8,3/8,2/4,1,1,.Ger6,G,iv,,Ger6,Ger,vii,o,65.0,b3,V,,Ger,False,True,"(-4, 0, -3, 6)",(),6,-4,Ab


## Successive ascending semitones

In [12]:
asc = cnt(pc_ivs, 1)
asc.n.value_counts()

1     2446
2      202
3       98
5       14
4        9
6        3
7        2
13       1
8        1
Name: n, dtype: int64

### Storing those with three or more

In [13]:
three_asc = labels.loc[asc[asc.n > 2].ixs.sum()]
three_asc.to_csv('three_asc.tsv', sep='\t')
three_asc.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note,bass_name
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
n01op18-1_01,167,136,136,1/4,1/4,3/4,1,1,@none,F,ii,,@none,,@none,,,,,,m,False,True,(),(),,,
n01op18-1_01,168,137,137,0,0,3/4,1,1,#viio7,F,ii,,#viio7,,#vii,o,7.0,,,,o7,False,True,"(5, 2, -1, -4)",(),5.0,5.0,F#
n01op18-1_01,169,137,137,1/4,1/4,3/4,1,1,VI6,F,ii,,VI6,,VI,,6.0,,,,M,False,True,"(0, -3, -4)",(),-4.0,0.0,G
n01op18-1_01,170,137,137,1/2,1/2,3/4,1,1,V65/ii,F,ii,,V65/ii,,V,,65.0,,ii,,Mm7,False,True,"(7, 4, 1, 3)",(),3.0,7.0,G#
n01op18-1_01,171,138,138,0,0,3/4,1,1,#viio65,F,ii,,#viio65,,#vii,o,65.0,,,,o7,False,True,"(2, -1, -4, 5)",(),5.0,2.0,A
n01op18-1_01,172,138,138,1/2,1/2,3/4,1,1,i6,F,ii,,i6,,i,,6.0,,,,m,False,True,"(-3, 1, 0)",(),0.0,-3.0,Bb
n01op18-1_01,181,143,143,0,0,3/4,1,1,#viio7,F,v,,#viio7,,#vii,o,7.0,,,,o7,False,True,"(5, 2, -1, -4)",(),5.0,5.0,B
n01op18-1_01,182,143,143,1/4,1/4,3/4,1,1,VI6,F,v,,VI6,,VI,,6.0,,,,M,False,True,"(0, -3, -4)",(),-4.0,0.0,C
n01op18-1_01,183,143,143,1/2,1/2,3/4,1,1,#viio/ii,F,v,,#viio/ii,,#vii,o,,,ii,,o,False,True,"(7, 4, 1)",(),7.0,7.0,C#
n01op18-1_01,184,144,144,0,0,3/4,1,1,V43,F,v,,V43,,V,,43.0,,,,Mm7,False,True,"(2, -1, 1, 5)",(),1.0,2.0,D


### Storing those with four or more

In [14]:
four_asc = labels.loc[asc[asc.n > 3].ixs.sum()]
four_asc.to_csv('four_asc.tsv', sep='\t')
four_asc.head(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,mc,mn,mc_onset,mn_onset,timesig,staff,voice,label,globalkey,localkey,pedal,chord,special,numeral,form,figbass,changes,relativeroot,phraseend,chord_type,globalkey_is_minor,localkey_is_minor,chord_tones,added_tones,root,bass_note,bass_name
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
n01op18-1_01,167,136,136,1/4,1/4,3/4,1,1,@none,F,ii,,@none,,@none,,,,,,m,False,True,(),(),,,
n01op18-1_01,168,137,137,0,0,3/4,1,1,#viio7,F,ii,,#viio7,,#vii,o,7.0,,,,o7,False,True,"(5, 2, -1, -4)",(),5.0,5.0,F#
n01op18-1_01,169,137,137,1/4,1/4,3/4,1,1,VI6,F,ii,,VI6,,VI,,6.0,,,,M,False,True,"(0, -3, -4)",(),-4.0,0.0,G
n01op18-1_01,170,137,137,1/2,1/2,3/4,1,1,V65/ii,F,ii,,V65/ii,,V,,65.0,,ii,,Mm7,False,True,"(7, 4, 1, 3)",(),3.0,7.0,G#
n01op18-1_01,171,138,138,0,0,3/4,1,1,#viio65,F,ii,,#viio65,,#vii,o,65.0,,,,o7,False,True,"(2, -1, -4, 5)",(),5.0,2.0,A
n01op18-1_01,172,138,138,1/2,1/2,3/4,1,1,i6,F,ii,,i6,,i,,6.0,,,,m,False,True,"(-3, 1, 0)",(),0.0,-3.0,Bb
n02op18-2_03,59,38,37,0,0,3/4,1,1,I6,G,I,,I6,,I,,6.0,,,,M,False,False,"(4, 1, 0)",(),0.0,4.0,B
n02op18-2_03,60,38,37,3/8,3/8,3/4,1,1,I+6,G,I,,I+6,,I,+,6.0,,,,+,False,False,"(4, 8, 0)",(),0.0,4.0,B
n02op18-2_03,61,38,37,1/2,1/2,3/4,1,1,IV,G,I,,IV,,IV,,,,,,M,False,False,"(-1, 3, 0)",(),-1.0,-1.0,C
n02op18-2_03,62,38,37,5/8,5/8,3/4,1,1,V65/V,G,I,,V65/V,,V,,65.0,,V,,Mm7,False,False,"(6, 3, 0, 2)",(),2.0,6.0,C#
