
# CRIM Intervals:  Melodic and Harmonic Corpus Search

### What You Can Do with this Notebook:

* Search A Corpus for Melodic and Harmonic nGrams

### A. Import Intervals and Other Code


In [3]:
import intervals
from intervals import * 
from intervals import main_objs
import intervals.visualizations as viz
import pandas as pd
import re
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact
from pandas.io.json import json_normalize
from pyvis.network import Network
from IPython.display import display
import requests
import os
import glob as glob


MYDIR = ("saved_csv")
CHECK_FOLDER = os.path.isdir(MYDIR)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MYDIR)
    print("created folder : ", MYDIR)
else:
    print(MYDIR, "folder already exists.")
    
MUSDIR = ("Music_Files")
CHECK_FOLDER = os.path.isdir(MUSDIR)

# If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MUSDIR)
    print("created folder : ", MUSDIR)
else:
    print(MUSDIR, "folder already exists.")

saved_csv folder already exists.
Music_Files folder already exists.


## B. Corpus Inventory

The **CorpusBase** class is a convenient way to find patterns in any given list of pieces.
    
See the **Corpus Methods** Notebook for details, and `print(CorpusBase.batch.__doc__)`

### B.1  Corpus Melodic Inventory

- Also see Corpus Methods Notebook for other ways to import local and remote files!

- NB: use `ImportedPiece`, not `piece`!
- NB:  `func1` and `func2` do **NOT** include the closing parentheses!

```
corpus = CorpusBase(['https://crimproject.org/mei/CRIM_Mass_0050_1.mei',
                     'https://crimproject.org/mei/CRIM_Mass_0050_2.mei'
                     ])
func1 = ImportedPiece.melodic
list_of_dfs = corpus.batch(func=func1, kwargs={'kind': 'd', 'end': False}, metadata=False)
func2 = ImportedPiece.ngrams
list_of_melodic_ngrams = corpus.batch(func=func2, kwargs={'n': 4, 'df': list_of_dfs}, metadata=True)
title_of_output = pd.concat(list_of_melodic_ngrams)



#### Import Corpus with URLs

In [6]:
#  first the list of pieces
corpus = CorpusBase(['https://crimproject.org/mei/CRIM_Mass_0001_1.mei',
                     'https://crimproject.org/mei/CRIM_Mass_0001_2.mei'
                     ])

Downloading remote score...
Successfully imported https://crimproject.org/mei/CRIM_Mass_0001_1.mei
Downloading remote score...
Successfully imported https://crimproject.org/mei/CRIM_Mass_0001_2.mei


### Corpus Results for Melodic Ngrams

In [7]:

func1 = ImportedPiece.melodic
list_of_dfs = corpus.batch(func=func1, kwargs={'kind': 'd', 'end': False}, metadata=False)
func2 = ImportedPiece.ngrams
list_of_melodic_ngrams = corpus.batch(func=func2, kwargs={'n': 5, 'df': list_of_dfs}, metadata=False)
func3 = ImportedPiece.detailIndex
list_of_detail_index = corpus.batch(func=func3, kwargs={'offset': False,'df': list_of_melodic_ngrams}, metadata=True)

mel_corpus = pd.concat(list_of_detail_index)
comp = mel_corpus.pop("Composer")
mel_corpus['Composer'] = comp
title = mel_corpus.pop("Title")
mel_corpus["Title"] = title
mel_corpus = mel_corpus.fillna('-')
mel_corpus

Unnamed: 0_level_0,Unnamed: 1_level_0,1,2,3,4,Composer,Title
Measure,Beat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1.0,"(1, -2, 2, 2, -2)","(1, 1, 1, 3, -2)","(1, -2, 2, 2, -2)","(1, -4, 4, -2, -5)",Pierre Colin,Missa Confitemini: Kyrie
1,4.0,"(-2, 2, 2, -2, 2)","(1, 1, 3, -2, -2)","(-2, 2, 2, -2, -2)","(-4, 4, -2, -5, 4)",Pierre Colin,Missa Confitemini: Kyrie
2,1.0,"(2, 2, -2, 2, -3)","(1, 3, -2, -2, -2)","(2, 2, -2, -2, -2)","(4, -2, -5, 4, -2)",Pierre Colin,Missa Confitemini: Kyrie
2,3.0,"(2, -2, 2, -3, 4)","(3, -2, -2, -2, 2)","(2, -2, -2, -2, -2)","(-2, -5, 4, -2, 2)",Pierre Colin,Missa Confitemini: Kyrie
3,1.0,"(-2, 2, -3, 4, -2)","(-2, -2, -2, 2, -2)","(-2, -2, -2, -2, 2)","(-5, 4, -2, 2, 2)",Pierre Colin,Missa Confitemini: Kyrie
...,...,...,...,...,...,...,...
78,1.5,-,"(2, -3, 2, -2, -2)",-,-,Pierre Colin,Missa Confitemini: Gloria
78,2.0,-,"(-3, 2, -2, -2, 2)",-,"(2, -5, 2, 2, -2)",Pierre Colin,Missa Confitemini: Gloria
78,2.5,"(-2, -2, -2, -2, 2)",-,-,-,Pierre Colin,Missa Confitemini: Gloria
78,3.0,-,"(2, -2, -2, 2, 1)",-,"(-5, 2, 2, -2, 4)",Pierre Colin,Missa Confitemini: Gloria


## Corpus Search for Melodic nGrams

Note that the 'mel_corpus' much match the name of the combined results created above

In [9]:

def _convertTuple(tup):
    out = ""
    if isinstance(tup, tuple):
        out = ', '.join(tup)
    return out

@interact
def mel_ngram_search(my_search="", df = fixed(mel_corpus)):
    df_no_tuple = df.applymap(_convertTuple)
    df_no_tuple.pop("Composer")
    df_no_tuple.pop("Title")
    df_no_tuple.insert(0, "Composer", df["Composer"])
    df_no_tuple.insert(1, "Title", df["Title"])
    filtered_ngrams = df_no_tuple[df_no_tuple.apply(lambda x: x.astype(str).str.contains(my_search).any(), axis=1)].copy()
    
    pd.set_option('max_columns', None)
    return filtered_ngrams.fillna("-").reset_index().applymap(str).style.applymap(lambda x: "background: #ccebc4" if re.search(my_search, x) else "")

interactive(children=(Text(value='', description='my_search'), Output()), _dom_classes=('widget-interact',))

## ## Corpus Harmonic nGrams


* Set the **kind** ("d" = diatonic, "c" = chromatic) via **kwargs** below.
* Set the length (**n**) of ngrams via **kwargs** below.


In [10]:
def _convertTuple(tup):
    out = ""
    if isinstance(tup, tuple):
        out = ', '.join(tup)
    return out

func1 = ImportedPiece.harmonic
list_of_dfs = corpus.batch(func=func1, kwargs={'kind': 'd'}, metadata=True)
func2 = ImportedPiece.ngrams
list_of_harmonic_ngrams = corpus.batch(func=func2, kwargs={'n': 4, 'df': list_of_dfs})
func3 = ImportedPiece.detailIndex
list_of_detail_index = corpus.batch(func=func3, kwargs={'offset':False,'df': list_of_harmonic_ngrams}, metadata=True)
cleaned_list = []
for df in list_of_detail_index:
    df_no_tuple = df.applymap(_convertTuple)
    df_no_tuple["Composer"] = df["Composer"]
    df_no_tuple["Title"] = df["Title"] 
    cleaned_list.append(df_no_tuple)
har_corpus = pd.concat(cleaned_list)
har_corpus.sort_index(axis=1, inplace=True, ascending=False)
c = har_corpus.pop("Composer")
t = har_corpus.pop("Title")
har_corpus.insert(0, "Composer", c)
har_corpus.insert(1, "Title", t)

har_corpus

Unnamed: 0_level_0,Unnamed: 1_level_0,Composer,Title,4_3,4_2,4_1,3_2,3_1,2_1
Measure,Beat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.0,Pierre Colin,Missa Confitemini: Kyrie,"3, 3, 5, 3","5, 5, 8, 5","8, 8, 10, 8","3, 3, 4, 3","6, 6, 6, 6","4, 4, 3, 4"
1,4.0,Pierre Colin,Missa Confitemini: Kyrie,"3, 5, 3, 5","5, 8, 5, 8","8, 10, 8, 10","3, 4, 3, 4","6, 6, 6, 6","4, 3, 4, 3"
2,1.0,Pierre Colin,Missa Confitemini: Kyrie,"5, 3, 5, 4","8, 5, 8, 7","10, 8, 10, 9","4, 3, 4, 4","6, 6, 6, 6","3, 4, 3, 3"
2,3.0,Pierre Colin,Missa Confitemini: Kyrie,"3, 5, 4, 3","5, 8, 7, 6","8, 10, 9, 10","3, 4, 4, 3","6, 6, 6, 8","4, 3, 3, 4"
3,1.0,Pierre Colin,Missa Confitemini: Kyrie,"5, 4, 3, 6","8, 7, 6, 5","10, 9, 10, 8","4, 4, 3, 3","6, 6, 8, 6","3, 3, 4, 6"
...,...,...,...,...,...,...,...,...,...
78,3.0,Pierre Colin,Missa Confitemini: Gloria,"5, 8, 6, 4","5, 10, 8, 6","10, 13, 12, 10","1, 3, 3, 3","6, 6, 7, 7","6, 4, 5, 5"
78,4.0,Pierre Colin,Missa Confitemini: Gloria,"8, 6, 4, 3","10, 8, 6, 8","13, 12, 10, 11","3, 3, 3, 4","6, 7, 7, 8","4, 5, 5, 4"
79,1.0,Pierre Colin,Missa Confitemini: Gloria,"6, 4, 3, 5","8, 6, 8, 5","12, 10, 11, 10","3, 3, 4, 4","7, 7, 8, 7","5, 5, 4, 3"
79,2.0,Pierre Colin,Missa Confitemini: Gloria,"4, 3, 5, 1",,"10, 11, 10, 8","3, 4, 4, 5","7, 8, 7, 6","5, 4, 3, 4"


## Corpus Search for Harmonic nGrams

* Note that the length of ngrams is set above via the code for **har_corpus**

In [11]:
def _convertTuple(tup):
    out = ""
    if isinstance(tup, tuple):
        out = ', '.join(tup)
    return out

@interact
def har_ngram_search(my_search="", df = fixed(har_corpus)):
    df2 = har_corpus.copy()
    filtered_ngrams = df2[df2.apply(lambda x: x.astype(str).str.contains(my_search).any(), axis=1)].copy()
    
    pd.set_option('max_columns', None)
    return filtered_ngrams.fillna("-").reset_index().applymap(str).style.applymap(lambda x: "background: #ccebc4" if re.search(my_search, x) else "")

interactive(children=(Text(value='', description='my_search'), Output()), _dom_classes=('widget-interact',))