# Various Routines to Harvest CRIM Metadata from Production Server

### Load the data from CRIM

In [1]:
import requests
import pandas as pd
import json
rel_data = requests.get('http://crimproject.org/data/relationships/').json()
obs_data = requests.get('https://crimproject.org/data/observations/').json()
people = requests.get('https://crimproject.org/data/people/').json()
pieces = requests.get('https://crimproject.org/data/pieces/').json()
voices = requests.get('https://crimproject.org/data/voices/').json()


# rels = pd.json_normalize(rel_data)
# rels.head(7)


In [2]:
print(pd.__version__)

1.2.4


In [3]:
obs_data

[{'url': 'https://crimproject.org/data/observations/1/',
  'id': 1,
  'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1012/',
   'name': 'Ian Lorenz'},
  'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Model_0011/',
   'piece_id': 'CRIM_Model_0011',
   'full_title': 'Tota pulchra es',
   'mass': None},
  'ema': '1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,@1-3,@1',
  'musical_type': 'fuga',
  'details': {'voices': [1, 2],
   'inverted': False,
   'periodic': False,
   'regularity': None,
   'retrograde': False,
   'sequential': False,
   'voice names': ['Superius', 'Contratenor'],
   'time intervals': 'B2',
   'entry intervals': '4-',
   'voice names reg': ['Superius', 'Contratenor']},
  'definition': {'url': 'https://crimproject.org/data/definition/15/',
   'id': 15,
   'observation_definition': [{'name': 'cantus firmus',
     'subtypes': [{'form': 'text', 'name': 'voice', 'validation': '\\d'},
      {'form': 'radio',
       'name': 'features',
      

Here is where to find the fields we need:

**Observer (which will be CRIM_Person_0000):  **

obs_pers_id = observation['observer']
rel_pers_id = relationship['observer']

**Observation_ID or Relationship_ID:**

obs_id = observation['id']
rel_id = relationship['id']

**EMA**

ema_data = observation['ema']

**PIECE**

obs_piece_id = observation['piece']

**VOICES** 

crim_voices https://crimproject.org/data/voices/

voice_name = voices['original_name']
voice_obj = voices['order']
piece_id_voices_file = voices[
These are dependent on the musical type, so we need to have searches dependent on other searches!  And then we might be able to translate the strings, like  'Superius\r\nContratenor', into sets of integers.  

Micah reports:  We want voices to be a JSON list: {'voices': [2, 4, 1, 2]}.

**MUSICAL TYPE**

This is a boolean, as well as a string in another field.  Since the "Musical Type" field can have more than one value, better to send the IP type values to comments?  Will that suppress the current values?  Or deal with these via Curate?

if observation['mt_cf'] is True
    new_json[{'musical_type': 'Cantus Firmus'}
    new_json.update({

**MODEL of SUBROUTINE**

for observation in observation_data:
    obs_pers_id = observation['observer']   
    obs_id = observation['id']
    obs_piece_id = observation['piece'] 
    obs_ema = observation['ema']
    if observation['mt_cf'] is True:
        cf_voice = observation['voice']
        cf_dur = observation['mt_cad_type']
        cf_mel = observation['mt_cad_type']
        cf_data = {'details': {'durations only': cf_dur, 'pitches only': cf_mel}}
        new_json.update(cf_data)
        new_json.update({'observer': obs_pers_id, 'id': obs_id, 'piece': obs_piece_id, 'ema': obs_ema})
    elif observation['mtsog'] is True:
    
    
if example_json['mt_cad'] is True:
    tone_val = example_json['mt_cad_tone']
    type_val = example_json['mt_cad_type']
    cad = {'cadence': {'tone': tone_val, 'type': type_val}}
    example_json.update(cad)

## Load the Voices Data

In [4]:
# Andy's method to load local file
# in this case 'CRIM_Voices_for_Migration' is a table that assembles voice names and object numbers, 
# with forms of voices found in CRIM Observation data, along with Regularized names
from pathlib import Path 
p = Path.cwd() / 'CRIM_Voices_for_Migration.csv'


In [5]:
# Another way to read the local file
v_data = pd.read_csv('CRIM_Voices_for_Migration.csv')
v_data.head(25) 

Unnamed: 0,work_title,piece.piece_id,piece__title,voice_id,order,original_name,regularized_name,CRIM_Obs_Name
0,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(1),1,[Superius],Superius,Superius
1,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(2),2,[Contratenor],Contratenor,Contratenor
2,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(3),3,[Tenor],Tenor,Tenor
3,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(4),4,[Bassus],Bassus,Bassus
4,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(1),1,[Superius],Superius,Superius
5,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(2),2,[Contratenor],Contratenor,Contratenor
6,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(3),3,[Tenor],Tenor,Tenor
7,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(4),4,[Bassus],Bassus,Bassus
8,Missa Confitemini,CRIM_Mass_0001_3,Credo,CRIM_Mass_0001_3(1),1,[Superius],Superius,Superius
9,Missa Confitemini,CRIM_Mass_0001_3,Credo,CRIM_Mass_0001_3(2),2,[Contratenor],Contratenor,Contratenor


In [6]:
sg_dur = True
sg_mel = True

sg_mel_dur = sg_dur == True | sg_mel == True
sg_mel_dur

# if cf_dur == True | cf_mel == True:
#         cf_mel_dur = True

True

## Load the Observation Migration Functions

In [7]:
# all functions for migration
# gets details for each musical type
# gets person id for each person
# migrates data to new format
# updates voice number and regularized names for voices
# functions for types with voice strings fixed and split and trailing/leading whitespace removed
# OK 11/2/21


# added logic for "both pitches and durations"  
def get_cf(observation):

    cf_voice = observation['mt_cf_voices'].rstrip().strip()
    cf_dur = observation['mt_cf_dur']
    cf_mel = observation['mt_cf_mel']
    cf_mel_dur =  cf_dur == True | cf_mel == True
    remarks = observation['remarks']
    cf_data = {'details': {'voice': cf_voice, 
                           'durations only': cf_dur, 
                           'pitches only': cf_mel,
                           'both pitches and durations': cf_mel_dur}}
    return cf_data

# 1/11/21:  changed sg_ostinato to ostinato, and sg_periodic to periodic
# Added logic for both pitches and durations
def get_sg(observation):
    
    sg_voice = observation['mt_sog_voices'].rstrip().strip()
    sg_dur = observation['mt_sog_dur']
    sg_mel = observation['mt_sog_mel']
    sg_mel_dur =  sg_dur == True | sg_mel == True
    sg_ostinato = observation['mt_sog_ostinato']
    sg_periodic = observation['mt_sog_periodic']
    remarks = observation['remarks']
    sg_data = {'details': {'voice': sg_voice, 
                           'durations only': sg_dur, 
                           'pitches only': sg_mel, 
                           'ostinato': sg_ostinato, 
                           'periodic': sg_periodic,
                           'both pitches and durations': sg_mel_dur}}
    return sg_data

# # Added logic for both pitches and durations

def get_csg(observation):
    csg_voice = observation['mt_csog_voices'].rstrip().strip()
    csg_dur = observation['mt_csog_dur']
    csg_mel = observation['mt_csog_mel']
    csg_mel_dur =  csg_dur == True | csg_mel == True
    remarks = observation['remarks']
    csg_data = {'details': {'voice': csg_voice, 
                           'durations only': csg_dur, 
                           'pitches only': csg_mel,
                           'both pitches and durations': csg_mel_dur}}
    return csg_data
    
def get_cd(observation):
    cd_voices = observation['mt_cd_voices'].replace('\r\n', '\n').split('\n')
    cd_voices = [v.strip() for v in cd_voices]
    remarks = observation['remarks']
    cd_data = {'details': {'voices': cd_voices}}
    return cd_data
    
# 1/11/21:  changed 'intervals' to 'entry intervals' to match def 11
# fg lacks 'flexed tonal' as seen in def 11.  But there are NO examples in CRIM data!

def get_fg(observation):
    
    fg_voices = observation['mt_fg_voices'].replace('\r\n', '\n').split('\n')
    fg_voices = [v.strip() for v in fg_voices]
    fg_int = observation['mt_fg_int']
    fg_tint = observation['mt_fg_tint']
    fg_periodic = observation['mt_fg_periodic']
    fg_strict = observation['mt_fg_strict']
    fg_flexed = observation['mt_fg_flexed']
    fg_sequential = observation['mt_fg_sequential']
    fg_inverted = observation['mt_fg_inverted']
    fg_retrograde = observation['mt_fg_retrograde']
    remarks = observation['remarks']
    fg_data = {'details': {'voices': fg_voices, 
                           'entry intervals': fg_int, 
                           'time intervals': fg_tint, 
                           'periodic': fg_periodic, 
                           'strict': fg_strict, 
                           'flexed': fg_flexed, 
                           'sequential': fg_sequential, 
                           'inverted': fg_inverted, 
                           'retrograde': fg_retrograde}}
        
    return fg_data

# 1/11/21:  removed underscores from time_intervals flexed_tonal, 
# invertible_counterpoint, added_entries
# added entry intervals to intervals

def get_pe(observation):
    pe_voices = observation['mt_pe_voices'].replace('\r\n', '\n').split('\n')
    pe_voices = [v.strip() for v in pe_voices]
    pe_int = observation['mt_pe_int']
    pe_tint = observation['mt_pe_tint']
    pe_strict = observation['mt_pe_strict']
    pe_flexed = observation['mt_pe_flexed']
    pe_flex_tonal = observation['mt_pe_flt']
    pe_sequential = observation['mt_pe_sequential']
    pe_added = observation['mt_pe_added']
    pe_invt = observation['mt_pe_invertible']
    remarks = observation['remarks']
    pe_data = {'details': {'voices': pe_voices, 
                           'entry intervals': pe_int, 
                           'time intervals': pe_tint, 
                           'strict': pe_strict, 
                           'flexed': pe_flexed, 
                           'flexed tonal': pe_flex_tonal, 
                           'sequential': pe_sequential, 
                           'invertible counterpoint': pe_invt, 
                           'added entries': pe_added}}
        
    return pe_data

# 1/11/21 removed underscores from time_intervals and flexed_tonal
# added entry intervals to intervals
# def 11 also contains int

def get_id(observation):
    
    id_voices = observation['mt_id_voices'].replace('\r\n', '\n').split('\n')
    id_voices = [v.strip() for v in id_voices]
    id_int = observation['mt_id_int']
    id_tint = observation['mt_id_tint']
    id_strict = observation['mt_id_strict']
    id_flexed = observation['mt_id_flexed']
    id_flex_tonal = observation['mt_id_flt']
    id_invt = observation['mt_id_invertible']
    remarks = observation['remarks']
    id_data = {'details': {'voices': id_voices, 
                           'entry intervals': id_int, 
                           'time intervals': id_tint, 
                           'strict': id_strict, 
                           'flexed': id_flexed, 
                           'flexed tonal': id_flex_tonal,
                           'invertible counterpoint': id_invt}}
    return id_data

#1/11/21;  added entry intervals to intervals
# removed underscore from time_intervals, flexed_tonal
def get_nid(observation):
    
    nid_voices = observation['mt_nid_voices'].replace('\r\n', '\n').split('\n')
    nid_voices = [v.strip() for v in nid_voices]
    nid_int = observation['mt_nid_int']
    nid_tint = observation['mt_nid_tint']
    nid_strict = observation['mt_nid_strict']
    nid_flexed = observation['mt_nid_flexed']
    nid_flex_tonal = observation['mt_nid_flt']
    nid_sequential = observation['mt_nid_sequential']
    nid_invt = observation['mt_nid_invertible']
    remarks = observation['remarks']
    nid_data = {'details': {'voices': nid_voices, 
                           'entry intervals': nid_int, 
                           'time intervals': nid_tint, 
                           'strict': nid_strict, 
                           'flexed': nid_flexed, 
                           'flexed tonal': nid_flex_tonal, 
                           'sequential': nid_sequential, 
                           'invertible counterpoint': nid_invt}}
    return nid_data   

# 1/11/21 removed underscore for hr_simple etc

def get_hr(observation):
    hr_voices = observation['mt_hr_voices'].replace('\r\n', '\n').split('\n')
    hr_voices = [v.strip() for v in hr_voices]
    hr_simple = observation['mt_hr_simple']
    hr_staggered = observation['mt_hr_staggered']
    hr_sequential = observation['mt_hr_sequential']
    hr_fauxbourdon= observation['mt_hr_fauxbourdon']
    remarks = observation['remarks']
    hr_data = {'details': {'voices': hr_voices, 
                           'hr simple': hr_simple, 
                           'hr staggered': hr_staggered,
                           'hr sequential': hr_sequential, 
                           'hr fauxbourdon': hr_fauxbourdon}}
    return hr_data    

# 1/11/21  removed underscore dovetail_voice

        
def get_cad(observation):
    cad_tone = observation['mt_cad_tone']
    cad_type = observation['mt_cad_type']
    cad_can = observation['mt_cad_cantizans'].rstrip().strip()
    cad_ten = observation['mt_cad_tenorizans'].rstrip().strip()
    cad_dove_v = observation['mt_cad_dtv'].rstrip().strip()
    cad_dove_i = observation['mt_cad_dti']
    remarks = observation['remarks']
    cad_data = {'details': {'tone': cad_tone, 
                            'type': cad_type, 
                            'voices': {'cantizans': cad_can, 
                            'tenorizans': cad_ten},
                            'dovetail voice': cad_dove_v}}
    return cad_data   

# Functions to look up voice number and regularized names.  OK 11/2/21

def look_up_voice_order(v_data, piece_id, obs_voice):
    mask = (v_data["piece.piece_id"] == piece_id) & (v_data["CRIM_Obs_Name"] == obs_voice)
    v_order = v_data[mask].iloc[0].order
    
    return int(v_order)
    
def look_up_voice_reg_name(v_data, piece_id, obs_voice):
    mask = (v_data["piece.piece_id"] == piece_id) & (v_data["CRIM_Obs_Name"] == obs_voice)
    v_reg_name = v_data[mask].iloc[0].regularized_name

    return (v_reg_name)

def get_person_id(people: list, name: str):
    obs_pers_id = 'unknown'
    for record in people:
            if record['name'] == name:
                obs_pers_id = (record['person_id'])          
    return obs_pers_id

def migrate_obs_data(observation: dict, people: list, v_data):
    
    #output = []
    migrated_json = {}
    m_type = ""
    details = {}
    
    obs_pers_name = observation['observer']['name']
    obs_pers_id = get_person_id(people, obs_pers_name)
    
    # here we get the 'name' as recorded in the CRIM Observation, then look up their CRIM ID
    
    obs_pers_name = observation['observer']['name']
    obs_pers_id = get_person_id(people, obs_pers_name)
    # and now the rest of the id for the piece and obs, plus ema
    obs_id = observation['id']
    obs_piece_id = observation['piece']['piece_id'] 
    obs_ema = observation['ema']
    remarks = observation['remarks']
    curated = observation['curated']
    created = observation['created']
    updated = observation['updated']
    # and how we test for each musical type and gather subfields
    
    # cantus firmus = data#9
    if observation['mt_cf'] is True:
        m_type = "cantus firmus"
        details = get_cf(observation)
        
    # soggetto data#94
    if observation['mt_sog'] is True:
        m_type = "soggetto"
        details = get_sg(observation)
        
    # counter soggetto data#53
    if observation['mt_csog'] is True:
        m_type = "counter soggetto"
        details = get_csg(observation)
       
    # contrapuntal duo data#88
    if observation['mt_cd'] is True:
        m_type = "contrapuntal duo"
        details = get_cd(observation)
        
    # fuga data #64
    if observation['mt_fg'] is True:
        m_type = "fuga"
        details = get_fg(observation)
        
    # periodic entry data #93
    if observation['mt_pe'] is True:
        m_type = "periodic entry"
        details = get_pe(observation)
           
    # imitative duo data #98
    if observation['mt_id'] is True:
        m_type = "imitative duo"
        details = get_id(observation)
               
    # non-imitative duo data #89
    if observation['mt_nid'] is True:
        m_type = "non-imitative duo"
        details = get_nid(observation)
      
    # homorhythm data #92
    if observation['mt_hr'] is True:
        m_type = "homorhythm"
        details = get_hr(observation)
         
    # cadence data #159
    # note that the details of the OLD cadence data don't fit the new type
    if observation['mt_cad'] is True:
        m_type = "cadence"
        details = get_cad(observation)
                   
    migrated_json.update({'observation_id': obs_id, 
                     'observer_id': obs_pers_id, 
                     'piece_id': obs_piece_id, 
                     'musical_type': m_type,
                     'ema': obs_ema,
                     'remarks': remarks,
                    'curated': curated,
                    'created': created,
                    'updated': updated})
    migrated_json.update(details) 
    
# now run voice order and name updates on the first stage of migrated data, then update

# 1/11/21:  removed underscores from voice_name_reg, can_name_reg, ten_name_reg, voice_order in all types

    v_order_list = []
    v_reg_name_list = []
    
    type_1_list = ["fuga", "contrapuntal duo", "periodic Entry", "imitative duo", "non-imitative duo", "homorhythm"]
    type_2_list = ["cantus firmus", "soggetto", "counter soggetto"]
    obs_id = migrated_json['observation_id']
    piece_id = migrated_json['piece_id']
    if migrated_json["musical_type"] in type_1_list:
        obs_voices = migrated_json['details']['voices']
        for obs_voice in obs_voices:
            v_reg_name_list.append(look_up_voice_reg_name(v_data, piece_id, obs_voice))
            v_order_list.append(look_up_voice_order(v_data, piece_id, obs_voice))
            migrated_json['details']['voice names reg']=v_reg_name_list
            migrated_json['details']['voice order']=v_order_list

    if migrated_json["musical_type"] in type_2_list:
        obs_voice = migrated_json['details']['voice']
        migrated_json['details']['voice name reg']=look_up_voice_reg_name(v_data, piece_id, obs_voice)
        migrated_json['details']['voice order']=look_up_voice_order(v_data, piece_id, obs_voice)
    
    if migrated_json["musical_type"] == "cadence":
        obs_voice = migrated_json['details']['voices']['cantizans']
        # the length check is needed for cadences with 'no voice for this role'
        if len(obs_voice) != 0:
            migrated_json['details']['voices']['can name reg']=look_up_voice_reg_name(v_data, piece_id, obs_voice)
            migrated_json['details']['voices']['can order']=look_up_voice_order(v_data, piece_id, obs_voice)
        obs_voice = migrated_json['details']['voices']['tenorizans']
        if len(obs_voice) != 0:
            migrated_json['details']['voices']['ten name reg']=look_up_voice_reg_name(v_data, piece_id, obs_voice)
            migrated_json['details']['voices']['ten order']=look_up_voice_order(v_data, piece_id, obs_voice)
        obs_voice = migrated_json['details']['dovetail voice']
        if len(obs_voice) != 0:
            migrated_json['details']['dov name reg']=look_up_voice_reg_name(v_data, piece_id, obs_voice)
            migrated_json['details']['dov order']=look_up_voice_order(v_data, piece_id, obs_voice)


    return migrated_json
   

## The following are just to Check Data and Test Lists

In [8]:
obs_data[10]

{'url': 'https://crimproject.org/data/observations/11/',
 'id': 11,
 'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1003/',
  'name': 'David Fiala'},
 'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Model_0018/',
  'piece_id': 'CRIM_Model_0018',
  'full_title': 'Baisez moy',
  'mass': None},
 'ema': '1-38/3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6,3+6/@all+@1,@1+@1,@1+@1,@1+@1-3,@1-3+@1,@1+@1,@1+@all,@all+@1-4,@1-4+@1-3,@1-3+@1-4,@1-4+@1,@1+@1-4,@1-4+@1-3,@1-3+@1-4,@1-4+@1-3,@1-3+@1,@1+@1,@1+@1,@1+@1-3,@1-3+@1,@1+@1,@1+@all,@all+@1-4,@1-4+@1-3,@1-3+@1-4.5,@1-4.5+@1,@1+@1-4,@1-4+@1-3,@1-3+@1-4,@1-4.5+@1-3,@1-3+@1,@1+@1,@1+@1,@1+@1-3,@1-3+@1,@1+@1,@1+@1,@1+@1',
 'musical_type': 'fuga',
 'details': {'voices': [4, 3],
  'inverted': False,
  'periodic': False,
  'regularity': 'strict',
  'retrograde': False,
  'sequential': False,
  'voice names': ['Bassu

In [36]:
# test lists
test_list = [9, 94, 53, 88, 64, 93, 98, 89, 92, 159]


## Now Migrate the Observations--All of Them!

In [36]:
# This runs the curation routine for all obs 11/2/21

clean_obs = [migrate_obs_data(o, people, v_data) for o in obs_data]


In [37]:
clean_obs

[{'observation_id': 1,
  'observer_id': 'CRIM_Person_1012',
  'piece_id': 'CRIM_Model_0011',
  'musical_type': 'fuga',
  'ema': '1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,@1-3,@1',
  'remarks': 'Makes up a longer ID',
  'curated': True,
  'created': '2017-06-19T19:22:54-04:00',
  'updated': '2020-11-27T15:11:10.011852-05:00',
  'details': {'voices': ['Superius', 'Contratenor'],
   'entry intervals': '4-',
   'time intervals': 'B2',
   'periodic': False,
   'strict': False,
   'flexed': False,
   'sequential': False,
   'inverted': False,
   'retrograde': False,
   'voice names reg': ['Superius', 'Contratenor'],
   'voice order': [1, 2]}},
 {'observation_id': 2,
  'observer_id': 'CRIM_Person_1012',
  'piece_id': 'CRIM_Mass_0009_3',
  'musical_type': 'fuga',
  'ema': '1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1',
  'remarks': 'Makes up longer ID',
  'curated': True,
  'created': '2017-06-19T19:22:54-04:00',
  'updated': '2020-07-15T11:37:01.876533-04:00',
  'details': {

### Export Migrated Observations as JSON

In [39]:
# Sends curated_obs to JSON

with open("cleaned_observations.json", "w") as f:
    json.dump(clean_obs, f)

In [None]:
# Same as list comprehension above
curated_obs_2 = []
for o in obs_data:
    curated_obs_2.append(migrate_obs_data(o, people))
# curated_obs   

### Various Filters and Tests to Check Pieces and Voices

In [None]:
mask = (v_data["piece.piece_id"] == "CRIM_Mass_0001_1") & (v_data["CRIM_Obs_Name"] == "Superius")
# mask

In [None]:
v_order = v_data[mask].order
v_reg_name = v_data[mask].regularized_name
v_reg_name

In [None]:
search = v_data[v_data['piece.piece_id'].str.contains("0015_2")]
search

In [None]:
v_data

### The Following is Used to Check if Any Observations with Voices Spelled Incorrectly

In [None]:
# Keep this!  
# Finds wrong voices and reports  WORKS 11/2/21

for obs in curated_obs:

    type_1_list = ["fuga", "contrapuntal duo", "periodic Entry", "imitative duo", "non-imitative duo", "homorhythm"]
    type_2_list = ["cantus firmus", "soggetto", "counter soggetto"]
    obs_id = obs['observation_id']
    piece_id = obs['piece_id']
    if obs["musical_type"] in type_1_list:
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voices = obs['details']['voices']
        for obs_voice in obs_voices:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
    
            
    if obs["musical_type"] in type_2_list:
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voice = obs['details']['voice']
        
        if obs_voice in voice_list:
            pass
        else:
            missing = obs_voice
            print(obs_id, piece_id, missing)


    if obs["musical_type"] == "cadence":
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voice = obs['details']['voices']['cantizans']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
        # the length check is needed for cadences with 'no voice for this role'
            
        obs_voice = obs['details']['voices']['tenorizans']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
        obs_voice = obs['details']['dovetail_voice']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
# test_obs

### More tests for Checking Voices and Voice Objects and Original Voice Names

In [None]:
search = v_data[v_data['piece.piece_id'].str.contains("05_3")]
# search
search.drop(columns=['work_title', 'voice_id', 'original_name'])



In [None]:
curated_obs['piece_id'].str.contains("Model_0013")


## Migrating Relationships

####  Format of new JSON for CRIM Django 2.0

mechanical transformation : 
    {'melodically inverted': 'boolean', 
    'metrically shifted': 'boolean', 
    'retrograde': 'boolean', 
    'sounding of different voices': 'boolean', 
    'systematic augmentation': 'boolean', 
    'systematic diminution': 'boolean', 
    'transposition': 
        {'radio': ['Transposed', 'Transposed Different Amounts']}}

new material : {}

non-mechanical transformation : 
    {'activity': 
        {'radio': ['Embellished', 'Reduced']}, 
     'extent': 
         {'radio': ['Amplified', 'Truncated']}, 
     'double or invertible counterpoint': 'boolean', 
     'melodically inverted': 'boolean', 
     'metrically shifted': 'boolean', 
     'new combination': 'boolean', 
     'new counter subject': 'boolean', 
     'old counter subject shifted metrically': 'boolean', 
     'old counter subject transposed': 'boolean', 
     'retrograde': 'boolean', 
     'sounding of different voices': 'boolean'}

omission : {}
quotation : 
    {'type': {'radio': ['Exact', 'Monnayage']}}

self : 
    {'self enchainment': 'boolean', 
    'self repetition': 'boolean', 
    'self return': 'boolean'}

#### Inspect an Old Relationship

In [8]:
test_rel = rel_data[2]
test_rel

{'url': 'http://crimproject.org/data/relationships/3/',
 'id': 3,
 'observer': {'url': 'http://crimproject.org/data/people/CRIM_Person_1010/',
  'name': 'Drew Ivarson'},
 'model_observation': {'url': 'http://crimproject.org/data/observations/5/',
  'id': 5,
  'piece': {'url': 'http://crimproject.org/data/pieces/CRIM_Model_0011/',
   'piece_id': 'CRIM_Model_0011',
   'full_title': 'Tota pulchra es'},
  'ema': '5-9,14-18/1,1,1-2,1-2,1-2,3,3,3-4,3-4,3-4/@3,@1-4,@1-4+@4,@1-4+@1-4,@1-3+@1-2,@3,@1-4,@1-4+@4,@1-4+@1-4,@1-3+@1-2'},
 'derivative_observation': {'url': 'http://crimproject.org/data/observations/6/',
  'id': 6,
  'piece': {'url': 'http://crimproject.org/data/pieces/CRIM_Mass_0009_5/',
   'piece_id': 'CRIM_Mass_0009_5',
   'full_title': 'Missa Tota pulchra es: Agnus Dei'},
  'ema': '9-12/3-4,2-4,2,2/@4+@1-4,@4+@1-4+@1-3,@1-3,@1'},
 'relationship_type': 'Non-mechanical transformation',
 'musical_type': 'Fuga',
 'rt_q': False,
 'rt_q_x': False,
 'rt_q_monnayage': False,
 'rt_tm': Fals

### Functions for Migrating Relationships

In [40]:
# getting details for types:
# not needed for omission or new material
def get_rel_person_id(people: list, name: str):
    rel_pers_id = 'unknown'
    for record in people:
            if record['name'] == name:
                rel_pers_id = (record['person_id'])          
    return rel_pers_id

#1/11/22 removed underscore from quotation type

def get_quotation(rel_data):
    if rel_data['rt_q_x'] == True:
        quotation_type = "exact"
    if rel_data['rt_q_monnayage'] == True:
        quotation_type = "monnayage"
    quotation_data = {'quotation type': quotation_type} 
    return quotation_data 

# what to do about new subtypes systematic augmentation and systematic diminution?
# what to do about the fact that 'transposed different amounts' does not yet exist in data?
# review radio buttons in new data for categories, etc

#1/11/22 removed underscore from all details

def get_mech_trans(rel_data):
    sound_diff_voices = rel_data['rt_tm_snd']
    mel_invert = rel_data['rt_tm_minv']
    retrograde = rel_data['rt_tm_retrograde']
    metric_shift = rel_data['rt_tm_ms']
    if (rel_data['rt_tm_transposed'] == False) and (rel_data['rt_tm_invertible'] == False):
        transposed = "untransposed"
    
    if (rel_data['rt_tm_transposed'] == True) and (rel_data['rt_tm_invertible'] == False):
        transposed = "transposed"
        
    if (rel_data['rt_tm_transposed'] == False) and (rel_data['rt_tm_invertible'] == True):
        transposed = "transposed invertible"
    
    mech_trans_data = {'details': 
                            {'melodically inverted': mel_invert, 
                             'metrically shifted': metric_shift, 
                             'retrograde': retrograde,
                            'sounding different voices': sound_diff_voices,
                            'transposed': transposed}}
    return mech_trans_data   

# review radio buttons in new data for categories, etc
# def get_non_mech_trans(rel_data):
#     activity = ""
#     extent = ""
#     embellished = rel_data['rt_tnm_embellished']
#     reduced = rel_data['rt_tnm_reduced']
#     amplified = rel_data['rt_tnm_amplified']
#     truncated = rel_data['rt_tnm_truncated']
#     new_counter_subject = rel_data['rt_tnm_ncs']
#     old_cs_shifted = rel_data['rt_tnm_ocs']
#     old_cs_transposed = rel_data['rt_tnm_ocst']
#     new_combination = rel_data['rt_tnm_nc']
#

# 1/11/22 removed underscores from details fields


def get_non_mech_trans(rel_data):
    activity = ""
    extent = ""
    non_mech_trans_data = ''
    embellished = rel_data['rt_tnm_embellished']
    reduced = rel_data['rt_tnm_reduced']
    amplified = rel_data['rt_tnm_amplified']
    truncated = rel_data['rt_tnm_truncated']
    new_counter_subject = rel_data['rt_tnm_ncs']
    old_cs_shifted = rel_data['rt_tnm_ocs']
    old_cs_transposed = rel_data['rt_tnm_ocst']
    new_combination = rel_data['rt_tnm_nc']

    if rel_data['rt_tnm_embellished'] == True:
        activity = 'embellished'
    if rel_data['rt_tnm_reduced'] == True:
        activity = 'reduced'
    if rel_data['rt_tnm_amplified'] == True:
        extent = 'amplified'
    if rel_data['rt_tnm_truncated'] == True:
        extent = 'truncated'
    if (activity == "") and (extent == ""):
        non_mech_trans_data = {'details': 
                            {'new countersubject': new_counter_subject,
                             'old cs shifted': old_cs_shifted,
                             'old cs transposed': old_cs_transposed,
                             'new combination': new_combination}}
    if (activity == "") and (extent != ""):
        non_mech_trans_data = {'details': 
                            {'extent': extent, 
                             'new countersubject': new_counter_subject,
                             'old cs shifted': old_cs_shifted,
                             'old cs transposed': old_cs_transposed,
                             'new combination': new_combination}}
                             
    if extent == "" and (activity != ''):
        non_mech_trans_data = {'details': 
                            {'activity': activity, 
                             'new counter subject': new_counter_subject,
                             'old cs shifted': old_cs_shifted,
                             'old cs transposed': old_cs_transposed,
                             'new combination': new_combination}}
    return non_mech_trans_data  

# Relationship Migration

def migrate_rel_data(rel_data: dict, people):
    
    migrated_rels = {}
    r_type = ""
    details = {}
    
    # here we get the 'name' as recorded in the CRIM Relationship, then look up their CRIM ID
    
    rel_pers_name = rel_data['observer']['name']
    rel_pers_id = get_rel_person_id(people, rel_pers_name)
    # and now the rest of the id for the piece and rel
    rel_id = rel_data['id']
    
    model_obs_id = rel_data['model_observation']['id']
    deriv_obs_id = rel_data['derivative_observation']['id']
    remarks = rel_data['remarks']
    curated = rel_data['curated']
    created = rel_data['created']
    updated = rel_data['updated']
    
    # and how we test for each musical type and gather subfields
    
    # new material = 
    if rel_data['rt_nm'] is True:
        r_type = "new material"
        
    # omission 
    if rel_data['rt_om'] is True:
        r_type = "omission"
        
    # quotation
    if rel_data['rt_q'] is True:
        r_type = "quotation"
        details = get_quotation(rel_data)
       
    # mechanical transformation
    if rel_data['rt_tm'] is True:
        r_type = "mechanical transformation"
        details = get_mech_trans(rel_data)
        
    # non-mechanical transformation
    if rel_data['rt_tnm'] is True:
        r_type = "non-mechanical transformation"
        details = get_non_mech_trans(rel_data)
    
    
                   
    migrated_rels.update({'relationship_id': rel_id, 
                     'observer_id': rel_pers_id, 
                     'relationship_type': r_type,
                      'model_observation_id': model_obs_id,
                      'derivative_observation_id': deriv_obs_id,
                      'curated': curated,
                      'created': created,
                      'updated': updated,
                     'remarks': remarks})
    # then add the various subtypes here via 'details' assembled for each type above
    migrated_rels.update(details) 
    
    return migrated_rels
    

## Migrate All Relationships

In [41]:
clean_rels = [migrate_rel_data(r, people) for r in rel_data]
clean_rels

[{'relationship_id': 1,
  'observer_id': 'CRIM_Person_1012',
  'relationship_type': 'quotation',
  'model_observation_id': 1,
  'derivative_observation_id': 2,
  'curated': True,
  'created': '2017-06-19T19:22:54-04:00',
  'updated': '2019-08-28T13:16:29.931000-04:00',
  'remarks': '',
  'quotation type': 'exact'},
 {'relationship_id': 2,
  'observer_id': 'CRIM_Person_1012',
  'relationship_type': 'non-mechanical transformation',
  'model_observation_id': 3,
  'derivative_observation_id': 4,
  'curated': True,
  'created': '2017-06-19T19:31:29-04:00',
  'updated': '2019-06-26T18:00:56.788000-04:00',
  'remarks': 'Shifted and transposed',
  'details': {'new countersubject': False,
   'old cs shifted': True,
   'old cs transposed': False,
   'new combination': False}},
 {'relationship_id': 3,
  'observer_id': 'CRIM_Person_1010',
  'relationship_type': 'non-mechanical transformation',
  'model_observation_id': 5,
  'derivative_observation_id': 6,
  'curated': True,
  'created': '2017-06-1

### Migrate just one Test Relationship

In [46]:
test_rel = rel_data[1]
clean_rel = migrate_rel_data(test_rel, people)
clean_rel


{'relationship_id': 2,
 'observer_id': 'CRIM_Person_1012',
 'relationship_type': 'Non-Mechanical Transformation',
 'model_observation_id': 3,
 'derivative_observation_id': 4,
 'curated': True,
 'created': '2017-06-19T19:31:29-04:00',
 'updated': '2019-06-26T18:00:56.788000-04:00',
 'remarks': 'Shifted and transposed',
 'details': {'activity': '',
  'extent': '',
  'new_counter_subject': False,
  'old_cs_shifted': True,
  'old_cs_transposed': False,
  'new_combination': False}}

### Export Migrated Relationships to JSON

In [42]:
# Sends curated_obs to JSON

with open("cleaned_relationships.json", "w") as f:
    json.dump(clean_rels, f)