# Various Routines to Harvest CRIM Metadata from Production Server

### Load the data from CRIM

In [1]:
import requests
import pandas as pd
import json
# old_rel_data = requests.get('http://crimproject.org/data/relationships-old/').json()
# old_obs_data = requests.get('https://crimproject.org/data/observations-old/').json()
rel_data = requests.get('http://crimproject.org/data/relationships/').json()
people = requests.get('https://crimproject.org/data/people/').json()
pieces = requests.get('https://crimproject.org/data/pieces/').json()
voices = requests.get('https://crimproject.org/data/voices/').json()


# rels = pd.json_normalize(rel_data)
# rels.head(7)


In [2]:
rel_data[0]

{'url': 'https://crimproject.org/data/relationships/1/',
 'id': 1,
 'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1012/',
  'name': 'Ian Lorenz'},
 'model_observation': {'url': 'https://crimproject.org/data/observations/1/',
  'id': 1,
  'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Model_0011/',
   'piece_id': 'CRIM_Model_0011',
   'full_title': 'Tota pulchra es'},
  'ema': '1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,@1-3,@1'},
 'derivative_observation': {'url': 'https://crimproject.org/data/observations/2/',
  'id': 2,
  'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Mass_0009_3/',
   'piece_id': 'CRIM_Mass_0009_3',
   'full_title': 'Missa Tota pulchra es: Credo'},
  'ema': '1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1'},
 'relationship_type': 'quotation',
 'musical_type': 'fuga',
 'definition': {'url': 'https://crimproject.org/data/definition/15/',
  'id': 15,
  'relationship_definition': [{'name': 'quotation',
    'sub

### Look at any of the data loaded

In [8]:
obs_data[1]

{'url': 'https://crimproject.org/data/observations-old/2/',
 'id': 2,
 'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1012/',
  'name': 'Ian Lorenz'},
 'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Mass_0009_3/',
  'piece_id': 'CRIM_Mass_0009_3',
  'full_title': 'Missa Tota pulchra es: Credo',
  'mass': 'CRIM_Mass_0009'},
 'ema': '1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1',
 'musical_type': 'Fuga',
 'mt_cf': False,
 'mt_cf_voices': '',
 'mt_cf_dur': False,
 'mt_cf_mel': False,
 'mt_sog': False,
 'mt_sog_voices': '',
 'mt_sog_dur': False,
 'mt_sog_mel': False,
 'mt_sog_ostinato': False,
 'mt_sog_periodic': False,
 'mt_csog': False,
 'mt_csog_voices': '',
 'mt_csog_dur': False,
 'mt_csog_mel': False,
 'mt_cd': False,
 'mt_cd_voices': '',
 'mt_fg': True,
 'mt_fg_voices': '[Tenor 1]\r\n[Tenor 3]',
 'mt_fg_int': '4-',
 'mt_fg_tint': 'B2',
 'mt_fg_periodic': False,
 'mt_fg_strict': False,
 'mt_fg_flexed': False,
 'mt_fg_sequential': False,
 

### Which Fields do we Need?

Here is where to find the fields we need:

**Observer (which will be CRIM_Person_0000):  **

obs_pers_id = observation['observer']
rel_pers_id = relationship['observer']

**Observation_ID or Relationship_ID:**

obs_id = observation['id']
rel_id = relationship['id']

**EMA**

ema_data = observation['ema']

**PIECE**

obs_piece_id = observation['piece']

**VOICES** 

crim_voices https://crimproject.org/data/voices/

voice_name = voices['original_name']
voice_obj = voices['order']
piece_id_voices_file = voices[
These are dependent on the musical type, so we need to have searches dependent on other searches!  And then we might be able to translate the strings, like  'Superius\r\nContratenor', into sets of integers.  

Micah reports:  We want voices to be a JSON list: {'voices': [2, 4, 1, 2]}.

**MUSICAL TYPE**

This is a boolean, as well as a string in another field.  Since the "Musical Type" field can have more than one value, better to send the IP type values to comments?  Will that suppress the current values?  Or deal with these via Curate?

if observation['mt_cf'] is True
    new_json[{'musical_type': 'Cantus Firmus'}
    new_json.update({

**MODEL of SUBROUTINE**

for observation in observation_data:
    obs_pers_id = observation['observer']   
    obs_id = observation['id']
    obs_piece_id = observation['piece'] 
    obs_ema = observation['ema']
    if observation['mt_cf'] is True:
        cf_voice = observation['voice']
        cf_dur = observation['mt_cad_type']
        cf_mel = observation['mt_cad_type']
        cf_data = {'details': {'durations only': cf_dur, 'pitches only': cf_mel}}
        new_json.update(cf_data)
        new_json.update({'observer': obs_pers_id, 'id': obs_id, 'piece': obs_piece_id, 'ema': obs_ema})
    elif observation['mtsog'] is True:
    
    
if example_json['mt_cad'] is True:
    tone_val = example_json['mt_cad_tone']
    type_val = example_json['mt_cad_type']
    cad = {'cadence': {'tone': tone_val, 'type': type_val}}
    example_json.update(cad)

### Load the Voices Data

'CRIM_Voices_for_Migration' is a table that assembles voice names and object numbers, with forms of voices found in CRIM Observation data, along with Regularized names.

In [9]:

v_data = pd.read_csv('CRIM_Voices_for_Migration.csv')
v_data.head(10) 

Unnamed: 0,work_title,piece.piece_id,piece__title,voice_id,order,original_name,regularized_name,CRIM_Obs_Name
0,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(1),1,[Superius],Superius,Superius
1,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(2),2,[Contratenor],Contratenor,Contratenor
2,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(3),3,[Tenor],Tenor,Tenor
3,Missa Confitemini,CRIM_Mass_0001_1,Kyrie,CRIM_Mass_0001_1(4),4,[Bassus],Bassus,Bassus
4,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(1),1,[Superius],Superius,Superius
5,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(2),2,[Contratenor],Contratenor,Contratenor
6,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(3),3,[Tenor],Tenor,Tenor
7,Missa Confitemini,CRIM_Mass_0001_2,Gloria,CRIM_Mass_0001_2(4),4,[Bassus],Bassus,Bassus
8,Missa Confitemini,CRIM_Mass_0001_3,Credo,CRIM_Mass_0001_3(1),1,[Superius],Superius,Superius
9,Missa Confitemini,CRIM_Mass_0001_3,Credo,CRIM_Mass_0001_3(2),2,[Contratenor],Contratenor,Contratenor


### Load the Observation Migration 

* all functions for migration
* gets details for each musical type
*  gets person id for each person
* migrates data to new format
* updates voice number and regularized names for voices
* functions for types with voice strings fixed and split and trailing/leading whitespace removed
*  OK 1/12/22

In [35]:



def get_cf(observation):

    features = None
    cf_voice = observation['mt_cf_voices'].rstrip().strip()
    if observation["mt_cf_dur"] and observation["mt_cf_mel"]:
        features = "both pitches and durations"
    elif observation["mt_cf_dur"]:
        features = "durations only"
    elif observation["mt_cf_mel"]:
        features = "pitches only"
    remarks = observation['remarks']
    cf_data = {'details': {'voice name': cf_voice, 
                           'features': features}}
    return cf_data

def get_sg(observation):
    
    features = None
    sg_voice = observation['mt_sog_voices'].rstrip().strip()
    if observation["mt_sog_dur"] and observation["mt_sog_mel"]:
        features = "both pitches and durations"
    elif observation["mt_sog_dur"]:
        features = "durations only"
    elif observation["mt_sog_mel"]:
        features = "pitches only"
    sg_ostinato = observation['mt_sog_ostinato']
    sg_periodic = observation['mt_sog_periodic']
    remarks = observation['remarks']
    sg_data = {'details': {'voice name': sg_voice, 
                           'features': features, 
                           'ostinato': sg_ostinato, 
                           'periodic': sg_periodic}}
    return sg_data


def get_csg(observation):
    features = None
    csg_voice = observation['mt_csog_voices'].rstrip().strip()
    if observation["mt_csog_dur"] and observation["mt_csog_mel"]:
        features = "both pitches and durations"
    elif observation["mt_csog_dur"]:
        features = "durations only"
    elif observation["mt_csog_mel"]:
        features = "pitches only"
    features = "pitches only"
    remarks = observation['remarks']
    csg_data = {'details': {'voice name': csg_voice, 
                           'features': features,}}
    return csg_data
    
def get_cd(observation):
    cd_voices = observation['mt_cd_voices'].replace('\r\n', '\n').split('\n')
    cd_voices = [v.strip() for v in cd_voices]
    remarks = observation['remarks']
    cd_data = {'details': {'voice names': cd_voices}}
    return cd_data
    
def get_fg(observation):
    
    regularity = None
    fg_voices = observation['mt_fg_voices'].replace('\r\n', '\n').split('\n')
    fg_voices = [v.strip() for v in fg_voices]
    fg_int = observation['mt_fg_int']
    fg_tint = observation['mt_fg_tint']
    fg_periodic = observation['mt_fg_periodic']
    fg_strict = observation['mt_fg_strict']
    fg_flexed = observation['mt_fg_flexed']
    if observation["mt_fg_strict"]:
        regularity = "strict"
    elif observation["mt_fg_flexed"]:
        regularity = "flexed"
    fg_sequential = observation['mt_fg_sequential']
    fg_inverted = observation['mt_fg_inverted']
    fg_retrograde = observation['mt_fg_retrograde']
    remarks = observation['remarks']
    fg_data = {'details': {'voice names': fg_voices, 
                           'entry intervals': fg_int, 
                           'time intervals': fg_tint, 
                           'periodic': fg_periodic, 
                           'regularity': regularity, 
                           'sequential': fg_sequential, 
                           'inverted': fg_inverted, 
                           'retrograde': fg_retrograde}}
        
    return fg_data


def get_pe(observation):
    regularity = None
    pe_voices = observation['mt_pe_voices'].replace('\r\n', '\n').split('\n')
    pe_voices = [v.strip() for v in pe_voices]
    pe_int = observation['mt_pe_int']
    pe_tint = observation['mt_pe_tint']
    if observation["mt_pe_strict"]:
        regularity = "strict"
    elif observation["mt_pe_flexed"]:
        regularity = "flexed"
    elif observation["mt_pe_flt"]:
        regularity = "flexed tonal"
    pe_sequential = observation['mt_pe_sequential']
    pe_added = observation['mt_pe_added']
    pe_invt = observation['mt_pe_invertible']
    remarks = observation['remarks']
    pe_data = {'details': {'voice names': pe_voices, 
                           'entry intervals': pe_int, 
                           'time intervals': pe_tint, 
                           'regularity': regularity,
                           'sequential': pe_sequential, 
                           'invertible counterpoint': pe_invt, 
                           'added entries': pe_added}}
        
    return pe_data


def get_id(observation):
    regularity = None
    id_voices = observation['mt_id_voices'].replace('\r\n', '\n').split('\n')
    id_voices = [v.strip() for v in id_voices]
    id_int = observation['mt_id_int']
    id_tint = observation['mt_id_tint']
    if observation["mt_id_strict"]:
        regularity = "strict"
    elif observation["mt_id_flexed"]:
        regularity = "flexed"
    elif observation["mt_id_flt"]:
        regularity = "flexed tonal"
    id_invt = observation['mt_id_invertible']
    remarks = observation['remarks']
    id_data = {'details': {'voice names': id_voices, 
                           'entry intervals': id_int, 
                           'time intervals': id_tint, 
                           'regularity': regularity,
                           'invertible counterpoint': id_invt,
                           'added entries': None}}
    return id_data

def get_nid(observation):
    regularity = None
    nid_voices = observation['mt_nid_voices'].replace('\r\n', '\n').split('\n')
    nid_voices = [v.strip() for v in nid_voices]
    nid_int = observation['mt_nid_int']
    nid_tint = observation['mt_nid_tint']

    if observation["mt_nid_strict"]:
        regularity = "strict"
    elif observation["mt_nid_flexed"]:
        regularity = "flexed"
    elif observation["mt_nid_flt"]:
        regularity = "flexed tonal"
    nid_sequential = observation['mt_nid_sequential']
    nid_invt = observation['mt_nid_invertible']
    remarks = observation['remarks']
    nid_data = {'details': {'voice names': nid_voices, 
                           'entry intervals': nid_int, 
                           'time intervals': nid_tint, 
                           'regularity': regularity,
                           'sequential': nid_sequential, 
                           'invertible counterpoint': nid_invt,
                           'added entries': None}}
    return nid_data   

def get_hr(observation):
    type = None
    hr_voices = observation['mt_hr_voices'].replace('\r\n', '\n').split('\n')
    hr_voices = [v.strip() for v in hr_voices]
    if observation["mt_hr_simple"]:
        type = "simple"
    elif observation["mt_hr_staggered"]:
        type = "staggered"
    elif observation["mt_hr_sequential"]:
        type = "sequential"
    elif observation["mt_hr_fauxbourdon"]:
        type = "fauxbourdon"
    remarks = observation['remarks']
    hr_data = {'details': {'voice names': hr_voices, 
                           'type': type,
                           'dialogue': None}}
    return hr_data    


        
def get_cad(observation):
    cad_dove_bool = None
    cad_tone = observation['mt_cad_tone']
    cad_type = observation['mt_cad_type']
    cad_can = observation['mt_cad_cantizans'].rstrip().strip()
    cad_ten = observation['mt_cad_tenorizans'].rstrip().strip()
    cad_dove_v = observation['mt_cad_dtv'].rstrip().strip()
    if not cad_dove_v:
        cad_dove_bool = False
    else: 
        cad_dove_bool = True  
    # cad_dove_i = observation['mt_cad_dti']
    remarks = observation['remarks']
    cad_data = {'details': {'tone': cad_tone, 
                            'type': cad_type, 
                            'cantizans name': cad_can, 
                            'tenorizans name': cad_ten,
                            'dovetail cadence':  cad_dove_bool,
                            'dovetail voice name': cad_dove_v,
                            'dovetail position': None,
                            'irregular cadence': None,
                            'irregular roles': None}}
    return cad_data   

def look_up_voice_order(v_data, piece_id, obs_voice):
    mask = (v_data["piece.piece_id"] == piece_id) & (v_data["CRIM_Obs_Name"] == obs_voice)
    v_order = v_data[mask].iloc[0].order
    
    return int(v_order)
    
def look_up_voice_reg_name(v_data, piece_id, obs_voice):
    mask = (v_data["piece.piece_id"] == piece_id) & (v_data["CRIM_Obs_Name"] == obs_voice)
    v_reg_name = v_data[mask].iloc[0].regularized_name

    return (v_reg_name)

def get_person_id(people: list, name: str):
    obs_pers_id = 'unknown'
    for record in people:
            if record['name'] == name:
                obs_pers_id = (record['person_id'])          
    return obs_pers_id

def migrate_obs_data(observation: dict, people: list, v_data):
    
    # adding fields = {}
    fields = {}
    migrated_json = {}
    m_type = ""
    details = {}
    

    # here we get the 'name' as recorded in the CRIM Observation, then look up their CRIM ID
    
    obs_pers_name = observation['observer']['name']
    obs_pers_id = get_person_id(people, obs_pers_name)
    # checks if the observer is "unknown".  If so:  replace with CRIM_Person_0000
    if obs_pers_id == "unknown":
        obs_pers_id = "CRIM_Person_0000"
    # and now the rest of the id for the piece and obs, plus ema
    obs_id = observation['id']
    obs_piece_id = observation['piece']['piece_id'] 
    obs_ema = observation['ema']
    remarks = observation['remarks']
    curated = observation['curated']
    created = observation['created']
    updated = observation['updated']
    # and how we test for each musical type and gather subfields
    
    # cantus firmus = data#9
    if observation['mt_cf'] is True:
        m_type = "cantus firmus"
        details = get_cf(observation)
        
    # soggetto data#94
    if observation['mt_sog'] is True:
        m_type = "soggetto"
        details = get_sg(observation)
        
    # counter soggetto data#53
    if observation['mt_csog'] is True:
        m_type = "counter soggetto"
        details = get_csg(observation)
       
    # contrapuntal duo data#88
    if observation['mt_cd'] is True:
        m_type = "contrapuntal duo"
        details = get_cd(observation)
        
    # fuga data #64
    if observation['mt_fg'] is True:
        m_type = "fuga"
        details = get_fg(observation)
        
    # periodic entry data #93
    if observation['mt_pe'] is True:
        m_type = "periodic entry"
        details = get_pe(observation)
           
    # imitative duo data #98
    if observation['mt_id'] is True:
        m_type = "imitative duo"
        details = get_id(observation)
               
    # non-imitative duo data #89
    if observation['mt_nid'] is True:
        m_type = "non-imitative duo"
        details = get_nid(observation)
      
    # homorhythm data #92
    if observation['mt_hr'] is True:
        m_type = "homorhythm"
        details = get_hr(observation)
         
    # cadence data #159
    # note that the details of the OLD cadence data don't fit the new type
    if observation['mt_cad'] is True:
        m_type = "cadence"
        details = get_cad(observation)
                   
    # nested version
    '''
    migrated_json.update({'pk': obs_id, 
                        'model': 'crim.cjobservation',
                        'fields': {'observer': obs_pers_id,
                        'piece': obs_piece_id, 
                        'musical_type': m_type,
                        'ema': obs_ema,
                        'details': details,
                        'remarks': remarks,
                        'curated': curated,
                        'created': created,
                        'updated': updated}
                        })
    '''
    
    # Corrected 1/22/22 as per MW, with 'pk' 'model' and 'fields'+'details'

    fields.update({'definition': 15,
                   'observer': obs_pers_id, 
                        'piece': obs_piece_id, 
                        'musical_type': m_type,
                        'ema': obs_ema,
                        'remarks': remarks,
                        'curated': curated,
                        'created': created,
                        'updated': updated})
    fields.update(details)
    migrated_json.update({'pk': obs_id, 
                            'model': 'crim.cjobservation',
                            'fields': fields})
                        
                        
    
    #OLD VERSION HERE 
    #migrated_json.update({'id': obs_id, 
                       # 'observer': obs_pers_id, 
                        #'piece': obs_piece_id, 
                       # 'musical_type': m_type,
                        #'ema': obs_ema,
                        #'remarks': remarks,
                        #'curated': curated,
                        #'created': created,
                        #'updated': updated})

    
    # migrated_json.update(details) 
    
# now run voice order and name updates on the first stage of migrated data, then update

# 1/11/21:  removed underscores from voice_name_reg, can_name_reg, ten_name_reg, voice_order in all types

    v_order_list = []
    v_reg_name_list = []
    
    type_1_list = ["fuga", "contrapuntal duo", "periodic entry", "imitative duo", "non-imitative duo", "homorhythm"]
    type_2_list = ["cantus firmus", "soggetto", "counter soggetto"]
    # replaced id with 'pk'
    obs_id = migrated_json['pk']
    # now add ['fields'] to these calls
    piece_id = migrated_json['fields']['piece']
    if migrated_json['fields']["musical_type"] in type_1_list:
        obs_voices = migrated_json['fields']['details']['voice names']
        for obs_voice in obs_voices:
            v_reg_name_list.append(look_up_voice_reg_name(v_data, piece_id, obs_voice))
            v_order_list.append(look_up_voice_order(v_data, piece_id, obs_voice))
            migrated_json['fields']['details']['voice names reg']=v_reg_name_list
            migrated_json['fields']['details']['voices']=v_order_list

    if migrated_json['fields']["musical_type"] in type_2_list:
        obs_voice = migrated_json['fields']['details']['voice name']
        migrated_json['fields']['details']['voice name reg']=look_up_voice_reg_name(v_data, piece_id, obs_voice)
        migrated_json['fields']['details']['voice']=look_up_voice_order(v_data, piece_id, obs_voice)

# this is Micah's NEW code for cadences

    if migrated_json['fields']["musical_type"] == "cadence":
        obs_can = migrated_json['fields']['details']['cantizans name']
        if not obs_can:
            migrated_json['fields']['details']['cantizans name reg'] = None
            migrated_json['fields']['details']['cantizans'] = None
        else:
            migrated_json['fields']['details']['cantizans name reg'] = look_up_voice_reg_name(v_data, piece_id, obs_can)
            migrated_json['fields']['details']['cantizans'] = look_up_voice_order(v_data, piece_id, obs_can)

        obs_ten = migrated_json['fields']['details']['tenorizans name']
        if not obs_ten:
            migrated_json['fields']['details']['tenorizans name reg'] = None
            migrated_json['fields']['details']['tenorizans'] = None
        else:
            migrated_json['fields']['details']['tenorizans name reg'] = look_up_voice_reg_name(v_data, piece_id, obs_ten)
            migrated_json['fields']['details']['tenorizans'] = look_up_voice_order(v_data, piece_id, obs_ten)
 
        obs_dovetail_voice = migrated_json['fields']['details']['dovetail voice name']
        if not obs_dovetail_voice:
            migrated_json['fields']['details']['dovetail voice name reg'] = None
            migrated_json['fields']['details']['dovetail cadence voice'] = None
        else:
            migrated_json['fields']['details']['dovetail voice name reg'] = look_up_voice_reg_name(v_data, piece_id, obs_dovetail_voice)
            migrated_json['fields']['details']['dovetail cadence voice'] = look_up_voice_order(v_data, piece_id, obs_dovetail_voice)


    return migrated_json


   

In [37]:
#  this tests on just ONE OBservation
sampleobs = obs_data[5192]
migrate_obs_data(sampleobs, people, v_data)


{'pk': 5199,
 'model': 'crim.cjobservation',
 'fields': {'definition': 15,
  'observer': 'CRIM_Person_0000',
  'piece': 'CRIM_Model_0009',
  'musical_type': 'soggetto',
  'ema': '3-5/1,1,1/@1-4,@1-3,@1',
  'remarks': '',
  'curated': True,
  'created': '2019-07-18T11:45:11.189000-04:00',
  'updated': '2019-07-18T11:45:11.189000-04:00',
  'details': {'voice name': 'Superius',
   'features': 'both pitches and durations',
   'ostinato': False,
   'periodic': False,
   'voice name reg': 'Superius',
   'voice': 1}}}

## The following are just to Check Data and Test Lists

In [13]:
obs_data[1369]

{'url': 'https://crimproject.org/data/observations-old/1370/',
 'id': 1370,
 'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1015/',
  'name': 'Anne Nautré'},
 'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Mass_0020_4/',
  'piece_id': 'CRIM_Mass_0020_4',
  'full_title': 'Missa Susanne un jour: Sanctus',
  'mass': 'CRIM_Mass_0020'},
 'ema': '29-33,35-37,39-40/2,2-3,3+5,1+5,1,4,4-5,5,5,5/@2-4,@1+@2-4,@1+@2-4,@2-4+@1,@1,@2-4,@1+@2-4,@1,@1-4,@1',
 'musical_type': 'Fuga',
 'mt_cf': False,
 'mt_cf_voices': '',
 'mt_cf_dur': False,
 'mt_cf_mel': False,
 'mt_sog': False,
 'mt_sog_voices': '',
 'mt_sog_dur': False,
 'mt_sog_mel': False,
 'mt_sog_ostinato': False,
 'mt_sog_periodic': False,
 'mt_csog': False,
 'mt_csog_voices': '',
 'mt_csog_dur': False,
 'mt_csog_mel': False,
 'mt_cd': False,
 'mt_cd_voices': '',
 'mt_fg': True,
 'mt_fg_voices': 'Contratenor\nTenor\nBassus\n[Superius]\nTenorSecundus\nBassus\nBassus',
 'mt_fg_int': '1+8-5+8-5-2-',
 'mt_fg_tint':

### Now Migrate the Observations--All of Them!

In [41]:
# This runs the curation routine for all obs 11/2/21

clean_obs = [migrate_obs_data(o, people, v_data) for o in obs_data]


In [17]:
# test list represents one of each basic musical type
test_list = [9, 94, 53, 88, 64, 93, 98, 89, 92, 159, 5199]
for item in test_list:
    print(clean_obs[item])


{'pk': 10, 'model': 'crim.cjobservation', 'fields': {'definition': 15, 'observer': 'CRIM_Person_1015', 'piece': 'CRIM_Mass_0017_4', 'musical_type': 'cantus firmus', 'ema': '1-41/5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5/@1,@1,@1,@1-3,@1,@1,@all,@1-4,@1-3,@1-4.5,@1,@1-4,@1-3,@1-4.5,@1-3,@all,@all,@1-3,@1,@1,@1-3,@1,@1,@all,@1-4,@1-3,@1-4.5,@1,@1-4,@1-3,@1-4.5,@1-3,@all,@all,@1,@1,@1,@1-3,@1,@1,@1', 'remarks': 'La voix de ténor primus cite intégralement le bassus du modèle', 'curated': True, 'created': '2017-06-20T13:13:54-04:00', 'updated': '2019-06-26T17:55:17.039000-04:00', 'details': {'voice name': 'Tenor[Primus]', 'features': 'both pitches and durations', 'voice name reg': 'Tenor 1 [resolutio]', 'voice': 3}}}
{'pk': 95, 'model': 'crim.cjobservation', 'fields': {'definition': 15, 'observer': 'CRIM_Person_1016', 'piece': 'CRIM_Model_0017', 'musical_type': 'soggetto', 'ema': '99-107/1,1,1,1,1,1,1,1,1/@1,@1,@1,@1,@1,@1,@1,@1,@1', 'remarks': '', 'c

In [22]:
clean_obs[5192]

{'pk': 5199,
 'model': 'crim.cjobservation',
 'fields': {'definition': 15,
  'observer': 'unknown',
  'piece': 'CRIM_Model_0009',
  'musical_type': 'soggetto',
  'ema': '3-5/1,1,1/@1-4,@1-3,@1',
  'remarks': '',
  'curated': True,
  'created': '2019-07-18T11:45:11.189000-04:00',
  'updated': '2019-07-18T11:45:11.189000-04:00',
  'details': {'voice name': 'Superius',
   'features': 'both pitches and durations',
   'ostinato': False,
   'periodic': False,
   'voice name reg': 'Superius',
   'voice': 1}}}

In [29]:
clean_obs.columns()

AttributeError: 'list' object has no attribute 'columns'

### Export Migrated Observations as JSON

In [42]:
# Sends curated_obs to JSON

with open("cleaned_observations.json", "w") as f:
    json.dump(clean_obs, f)

In [None]:
# Same as list comprehension above
curated_obs_2 = []
for o in obs_data:
    curated_obs_2.append(migrate_obs_data(o, people))
# curated_obs   

### Various Filters and Tests to Check Pieces and Voices

In [None]:
mask = (v_data["piece.piece_id"] == "CRIM_Mass_0001_1") & (v_data["CRIM_Obs_Name"] == "Superius")
# mask

In [None]:
v_order = v_data[mask].order
v_reg_name = v_data[mask].regularized_name
v_reg_name

In [None]:
search = v_data[v_data['piece.piece_id'].str.contains("0015_2")]
search

In [None]:
v_data

### The Following is Used to Check if Any Observations with Voices Spelled Incorrectly

In [34]:
# Keep this!  
# Finds wrong voices and reports  WORKS 11/2/21

for obs in curated_obs:

    type_1_list = ["fuga", "contrapuntal duo", "periodic Entry", "imitative duo", "non-imitative duo", "homorhythm"]
    type_2_list = ["cantus firmus", "soggetto", "counter soggetto"]
    obs_id = obs['id']
    piece_id = obs['piece']
    if obs["musical_type"] in type_1_list:
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voices = obs['details']['voices']
        for obs_voice in obs_voices:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
    
            
    if obs["musical_type"] in type_2_list:
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voice = obs['details']['voice']
        
        if obs_voice in voice_list:
            pass
        else:
            missing = obs_voice
            print(obs_id, piece_id, missing)


    if obs["musical_type"] == "cadence":
        search = v_data[v_data['piece.piece_id'].str.contains(piece_id)]
        voice_list = search["CRIM_Obs_Name"].to_list()
        obs_voice = obs['details']['voices']['cantizans']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
        # the length check is needed for cadences with 'no voice for this role'
            
        obs_voice = obs['details']['voices']['tenorizans']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
        obs_voice = obs['details']['dovetail_voice']
        if len(obs_voice) != 0:
            if obs_voice in voice_list:
                pass
            else:
                missing = obs_voice
                print(obs_id, piece_id, missing)
# test_obs

NameError: name 'curated_obs' is not defined

### More tests for Checking Voices and Voice Objects and Original Voice Names

In [None]:
search = v_data[v_data['piece.piece_id'].str.contains("05_3")]
# search
search.drop(columns=['work_title', 'voice_id', 'original_name'])



In [None]:
curated_obs['piece_id'].str.contains("Model_0013")


## Migrating Relationships

####  Format of new JSON for CRIM Django 2.0 as of 1/12/21

[
  {
    "name": "quotation",
    "subtypes": [
      {
        "form": "radio",
        "name": "type",
        "options": [
          "exact",
          "monnayage"
        ]
      },
      {
        "form": "radio",
        "name": "self",
        "options": [
          "none",
          "enchainment",
          "repetition",
          "return"
        ]
      }
    ]
  },
  {
    "name": "mechanical transformation",
    "subtypes": [
      {
        "form": "boolean",
        "name": "sounding in different voices"
      },
      {
        "form": "boolean",
        "name": "melodically inverted"
      },
      {
        "form": "boolean",
        "name": "retrograde"
      },
      {
        "form": "boolean",
        "name": "metrically shifted"
      },
      {
        "form": "radio",
        "name": "transposition",
        "options": [
          "not transposed",
          "transposed",
          "transposed different amounts"
        ]
      },
      {
        "form": "boolean",
        "name": "double or invertible counterpoint"
      },
      {
        "form": "boolean",
        "name": "systematic diminution"
      },
      {
        "form": "boolean",
        "name": "systematic augmentation"
      },
      {
        "form": "radio",
        "name": "self",
        "options": [
          "none",
          "enchainment",
          "repetition",
          "return"
        ]
      }
    ]
  },
  {
    "name": "non-mechanical transformation",
    "subtypes": [
      {
        "form": "radio",
        "name": "extent",
        "options": [
          "amplified",
          "truncated"
        ]
      },
      {
        "form": "radio",
        "name": "activity",
        "options": [
          "embellished",
          "reduced"
        ]
      },
      {
        "form": "boolean",
        "name": "sounding in different voices"
      },
      {
        "form": "boolean",
        "name": "whole passage transposed"
      },
      {
        "form": "boolean",
        "name": "whole passage metrically shifted"
      },
      {
        "form": "boolean",
        "name": "melodically inverted"
      },
      {
        "form": "boolean",
        "name": "retrograde"
      },
      {
        "form": "boolean",
        "name": "new counter subject"
      },
      {
        "form": "boolean",
        "name": "old counter subject shifted metrically"
      },
      {
        "form": "boolean",
        "name": "old counter subject transposed"
      },
      {
        "form": "boolean",
        "name": "double or invertible counterpoint"
      },
      {
        "form": "boolean",
        "name": "new combination"
      },
      {
        "form": "radio",
        "name": "self",
        "options": [
          "none",
          "enchainment",
          "repetition",
          "return"
        ]
      }
    ]
  },
  {
    "form": "boolean",
    "name": "omission"
  },
  {
    "form": "boolean",
    "name": "new material"
  }
]


#### Inspect an Old Relationship

In [6]:
test_rel = rel_data[2]
test_rel

{'url': 'http://crimproject.org/data/relationships/3/',
 'id': 3,
 'observer': {'url': 'http://crimproject.org/data/people/CRIM_Person_1010/',
  'name': 'Drew Ivarson'},
 'model_observation': {'url': 'http://crimproject.org/data/observations/5/',
  'id': 5,
  'piece': {'url': 'http://crimproject.org/data/pieces/CRIM_Model_0011/',
   'piece_id': 'CRIM_Model_0011',
   'full_title': 'Tota pulchra es'},
  'ema': '5-9,14-18/1,1,1-2,1-2,1-2,3,3,3-4,3-4,3-4/@3,@1-4,@1-4+@4,@1-4+@1-4,@1-3+@1-2,@3,@1-4,@1-4+@4,@1-4+@1-4,@1-3+@1-2'},
 'derivative_observation': {'url': 'http://crimproject.org/data/observations/6/',
  'id': 6,
  'piece': {'url': 'http://crimproject.org/data/pieces/CRIM_Mass_0009_5/',
   'piece_id': 'CRIM_Mass_0009_5',
   'full_title': 'Missa Tota pulchra es: Agnus Dei'},
  'ema': '9-12/3-4,2-4,2,2/@4+@1-4,@4+@1-4+@1-3,@1-3,@1'},
 'relationship_type': 'Non-mechanical transformation',
 'musical_type': 'Fuga',
 'rt_q': False,
 'rt_q_x': False,
 'rt_q_monnayage': False,
 'rt_tm': Fals

### Functions for Migrating Relationships

* Correct as of 1/12/22

In [43]:
# getting details for types:
# not needed for omission or new material
def get_rel_person_id(people: list, name: str):
    rel_pers_id = None
    for record in people:
            if record['name'] == name:
                rel_pers_id = (record['person_id'])
                # check unknown
                if rel_pers_id == "unknown":
                    rel_pers_id = "CRIM_Person_0000"          
    return rel_pers_id

#1/11/22 removed underscore from quotation type.  Not revised by Micah

def get_quotation(rel_data):
    if rel_data['rt_q_x'] == True:
        quotation_type = "exact"
    if rel_data['rt_q_monnayage'] == True:
        quotation_type = "monnayage"
    quotation_data = {'details':
                      {'type': quotation_type,
                      'self': None}} 
    return quotation_data 

# Micah's New Code for Mech Trans

def get_mech_trans(rel_data):
    sound_diff_voices = rel_data['rt_tm_snd']
    mel_invert = rel_data['rt_tm_minv']
    retrograde = rel_data['rt_tm_retrograde']
    metric_shift = rel_data['rt_tm_ms']
    transposed = None
    invertible = rel_data['rt_tm_invertible']
    
    if rel_data['rt_tm_transposed'] is True:
        transposition = "transposed"
    elif rel_data['rt_tm_transposed'] is False:
        transposition = "not transposed"
    
    mech_trans_data = {'details': 
                            {'melodically inverted': mel_invert, 
                            'metrically shifted': metric_shift, 
                            'retrograde': retrograde,
                            'sounding in different voices': sound_diff_voices,
                            'transposition': transposition,
                            'double or invertible counterpoint': invertible,
                            'self': None,
                            'systematic diminution': None,
                            'systematic augmentation': None}}
    return mech_trans_data


# Micah's new code:

def get_non_mech_trans(rel_data):
    activity = None
    extent = None
    new_counter_subject = rel_data['rt_tnm_ncs']
    old_cs_shifted = rel_data['rt_tnm_ocs']
    old_cs_transposed = rel_data['rt_tnm_ocst']
    new_combination = rel_data['rt_tnm_nc']

    if rel_data['rt_tnm_embellished']:
        activity = 'embellished'
    elif rel_data['rt_tnm_reduced']:
        activity = 'reduced'

    if rel_data['rt_tnm_amplified']:
        extent = 'amplified'
    elif rel_data['rt_tnm_truncated']:
        extent = 'truncated'

    non_mech_trans_data = {'details':
                            {'activity': activity,
                             'extent': extent,
                             'new counter subject': new_counter_subject,
                             'sounding in different voices': None,
                             'whole passage transposed': None,
                             'whole passage metrically shifted': None,
                             'melodically inverted': None,
                             'retrograde': None,
                             'double or invertible counterpoint': None,
                             'old counter subject shifted metrically': old_cs_shifted,
                             'old counter subject transposed': old_cs_transposed,
                             'new combination': new_combination,
                             'double or invertible counterpoint': None,
                             'self': None}}
    return non_mech_trans_data  


# Relationship Migration

def migrate_rel_data(rel_data: dict, people):
    
    fields = {}
    migrated_rels = {}
    r_type = ""
    details = {}
    
    # here we get the 'name' as recorded in the CRIM Relationship, then look up their CRIM ID
    
    rel_pers_name = rel_data['observer']['name']
    rel_pers_id = get_rel_person_id(people, rel_pers_name)
    if rel_pers_id == "unknown":
        rel_pers_id = "CRIM_Person_0000"
    # and now the rest of the id for the piece and rel
    rel_id = rel_data['id']
    
    model_obs_id = rel_data['model_observation']['id']
    deriv_obs_id = rel_data['derivative_observation']['id']
    remarks = rel_data['remarks']
    curated = rel_data['curated']
    created = rel_data['created']
    updated = rel_data['updated']
    
    # and how we test for each musical type and gather subfields
    
    # new material = 
    if rel_data['rt_nm'] is True:
        r_type = "new material"
        
    # omission 
    if rel_data['rt_om'] is True:
        r_type = "omission"
        
    # quotation
    if rel_data['rt_q'] is True:
        r_type = "quotation"
        details = get_quotation(rel_data)
       
    # mechanical transformation
    if rel_data['rt_tm'] is True:
        r_type = "mechanical transformation"
        details = get_mech_trans(rel_data)
        
    # non-mechanical transformation
    if rel_data['rt_tnm'] is True:
        r_type = "non-mechanical transformation"
        details = get_non_mech_trans(rel_data)
    
    fields.update({'definition': 15,
                   'observer': rel_pers_id, 
                    'relationship_type': r_type,
                    'model_observation': model_obs_id,
                    'derivative_observation': deriv_obs_id,
                    'curated': curated,
                    'created': created,
                    'updated': updated,
                    'remarks': remarks})
    
    fields.update(details)
                   
    # changing ID to PK, adding model information, remove data now in 'fields'
    migrated_rels.update({'pk': rel_id,
                        'model': "crim.cjrelationship",
                        'fields': fields})
    '''
    this is the OLD version:
    migrated_rels.update({'pk': rel_id,
                        'model': "crim.cjrelationship",
                        'observer': rel_pers_id, 
                        'relationship_type': r_type,
                        'model_observation': model_obs_id,
                        'derivative_observation': deriv_obs_id,
                        'curated': curated,
                        'created': created,
                        'updated': updated,
                        'remarks': remarks})
    '''
    
    # then add the various subtypes here via 'details' assembled for each type above
    
    # removed 1/22/22 cf MW
    #migrated_rels.update(details) 
    
    return migrated_rels
    

## Migrate All Relationships

In [44]:
clean_rels = [migrate_rel_data(r, people) for r in rel_data]
# clean_rels

### Migrate just one Test Relationship

In [39]:
test_rel = rel_data[0]
clean_rel = migrate_rel_data(test_rel, people)
clean_rel


{'pk': 1,
 'model': 'crim.cjrelationship',
 'fields': {'definition': 15,
  'observer': 'CRIM_Person_1012',
  'relationship_type': 'quotation',
  'model_observation': 1,
  'derivative_observation': 2,
  'curated': True,
  'created': '2017-06-19T19:22:54-04:00',
  'updated': '2019-08-28T13:16:29.931000-04:00',
  'remarks': '',
  'details': {'type': 'exact', 'self': None}}}

### Export Migrated Relationships to JSON

In [45]:
# Sends curated_obs to JSON

with open("cleaned_relationships.json", "w") as f:
    json.dump(clean_rels, f)