#  Cadences in a Corpus



## A. Import Intervals and Other Code

* See the Corpus Methods notebook for details on the various options for local and remote files


In [1]:
import intervals
from intervals import * 
from intervals import main_objs
import pandas as pd
import re
import requests
import os
import glob as glob

MYDIR = ("saved_csv")
CHECK_FOLDER = os.path.isdir(MYDIR)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MYDIR)
    print("created folder : ", MYDIR)
else:
    print(MYDIR, "folder already exists.")
    
MUSDIR = ("Music_Files")
CHECK_FOLDER = os.path.isdir(MUSDIR)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MUSDIR)
    print("created folder : ", MUSDIR)
else:
    print(MUSDIR, "folder already exists.")

saved_csv folder already exists.
Music_Files folder already exists.


### Full CRIM Corpus

* Here we omit various monophonic pieces and a few others for which there are errors.

```
piece_list = []
raw_prefix = "https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/"
URL = "https://api.github.com/repos/CRIM-Project/CRIM-online/git/trees/990f5eb3ff1e9623711514d6609da4076257816c"
piece_json = requests.get(URL).json()
```

* A list of files to exclude:

```
exclude_list = ['CRIM_Model_0003.mei', 'CRIM_Model_0004.mei', 'CRIM_Model_0005.mei', 'CRIM_Model_0006.mei', 'CRIM_Model_0007.mei','CRIM_Model_0022.mei', 'CRIM_Model_0028.mei', 'CRIM_Model_0035.mei', 'CRIM_Mass_0029_4.mei', 'CRIM_Mass_0049_2.mei','CRIM_Mass_0049_5.mei']
```

*  The following ensures that we don't try to analyze the 'Mass head only' files, which have no musical content:

```
pattern = 'CRIM_Mass_([0-9]{4}).mei
```

* Now the request for all the files


```for p in piece_json["tree"]:
p_name = p["path"]
if re.search(pattern, p_name):
    pass
elif p_name in exclude_list:
    pass
else:
    piece_list.append(raw_prefix + p["path"])
```

In [2]:
# this will pull ALL pieces from CRIM on Github
# Note that we exclude various monophonic pieces (which have no contrapuntal cadences)
# and also a few pieces that seem to throw errors for reasons we don't understand.
corpus_list = []
raw_prefix = "https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/"
URL = "https://api.github.com/repos/CRIM-Project/CRIM-online/git/trees/990f5eb3ff1e9623711514d6609da4076257816c"
piece_json = requests.get(URL).json()

# list of files to exclude
exclude_list = ['CRIM_Model_0003.mei', 'CRIM_Model_0004.mei', 'CRIM_Model_0005.mei', 'CRIM_Model_0006.mei', 
             'CRIM_Model_0007.mei',
            'CRIM_Model_0022.mei', 'CRIM_Model_0028.mei', 'CRIM_Model_0035.mei', 'CRIM_Mass_0029_4.mei', 
             'CRIM_Mass_0049_2.mei',
            'CRIM_Mass_0049_5.mei']

# this ensures that we don't try to analyze the 'Mass head only' files, which have no musical content

pattern = 'CRIM_Mass_([0-9]{4}).mei'

# and now the request for all the files
for p in piece_json["tree"]:
    p_name = p["path"]
    if re.search(pattern, p_name):
        pass
    elif p_name in exclude_list:
        pass
    else:
        corpus_list.append(raw_prefix + p["path"])

In [3]:
# short list for testing:

short_corpus = corpus_list[0:5]

In [4]:
# now pass the corpus_list to corpbase (which creates the music21 files)
corpus  = CorpusBase(corpus_list)

Downloading remote score...
Successfully imported https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Mass_0001_1.mei
Downloading remote score...
Successfully imported https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Mass_0001_2.mei
Downloading remote score...
Successfully imported https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Mass_0001_3.mei
Downloading remote score...
Successfully imported https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Mass_0001_4.mei
Downloading remote score...
Successfully imported https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Mass_0001_5.mei


In [5]:
# set length of desired ngrams
n = 3
func = ImportedPiece.durationalRatios
list_of_dur_rats = corpus.batch(func=func, metadata=True)
list_of_dur_rats_rounded = []
for df in list_of_dur_rats:
    list_of_dur_rats_rounded.append(df.round(2))
func2 = ImportedPiece.melodic
list_of_mel = corpus.batch(func=func2, kwargs={'kind' : 'd'})
func3 = ImportedPiece.ngrams
list_of_ngrs = corpus.batch(func=func3, kwargs={'df' : list_of_mel, 'other' : list_of_dur_rats_rounded, 'n' : n})
combined_df = pd.concat(list_of_ngrs, ignore_index=False)
combined_df

Unnamed: 0,1,2,3,4,Composer,Title
6.0,"1_0.33, -2_2.0, 2_1.0","1_0.33, 1_2.0, 1_1.0","1_0.33, -2_2.0, 2_1.0","1_0.33, -4_2.0, 4_1.0",Pierre Colin,Missa Confitemini: Kyrie
8.0,"-2_2.0, 2_1.0, 2_0.75","1_2.0, 1_1.0, 3_0.75","-2_2.0, 2_1.0, 2_0.75","-4_2.0, 4_1.0, -2_1.5",Pierre Colin,Missa Confitemini: Kyrie
12.0,"2_1.0, 2_0.75, -2_0.33","1_1.0, 3_0.75, -2_0.17","2_1.0, 2_0.75, -2_0.33","4_1.0, -2_1.5, -5_0.33",Pierre Colin,Missa Confitemini: Kyrie
16.0,"2_0.75, -2_0.33, 2_1.0","3_0.75, -2_0.17, -2_1.0","2_0.75, -2_0.33, -2_2.0","-2_1.5, -5_0.33, 4_2.0",Pierre Colin,Missa Confitemini: Kyrie
19.0,"-2_0.33, 2_1.0, -3_1.0","-2_0.17, -2_1.0, -2_4.0","-2_0.33, -2_2.0, -2_0.5",,Pierre Colin,Missa Confitemini: Kyrie
...,...,...,...,...,...,...
252.0,"-2_1.0, -2_2.0, -2_0.5",,"-2_1.0, -2_1.0, -2_2.0","2_2.0, 2_1.0, -4_2.0",Pierre Colin,Missa Confitemini: Agnus Dei
253.0,,"-2_0.33, -2_1.0, -2_1.0",,,Pierre Colin,Missa Confitemini: Agnus Dei
254.0,"-2_2.0, -2_0.5, 2_8.0","-2_1.0, -2_1.0, 2_4.0","-2_1.0, -2_2.0, -2_4.0","2_1.0, -4_2.0, 4_4.0",Pierre Colin,Missa Confitemini: Agnus Dei
255.0,,"-2_1.0, 2_4.0, 1_4.0",,,Pierre Colin,Missa Confitemini: Agnus Dei
