# Run Experiment

This notebook runs a benchmark on a given system.  In order to run an experiment with a new system, you only need to do two things:
- Implement the `offline_processing()` and `online_processing()` functions in a jupyter notebook for the new system.  You can use 02a_simpleOfflineDTW.ipynb as a template.
- Import the notebook containing the system's implementation into this notebook as a python package.

This notebook will run an entire benchmark using the specified system, and save the hypotheses to a specified output directory.

In [None]:
import import_ipynb
import System_MATCH as system # replace this with new system
import numpy as np
import os.path

In [None]:
# user specified
EXP_NAME = 'match' # experiment name, e.g. simpleOfflineDTW_train
SCENARIOS_ROOT_DIR = 'scenarios'
hop_size = 512
sr = 22050
dtw_steps = np.array([1,1,1,2,2,1]).reshape((-1,2))
dtw_weights = np.array([2,3,3])

In [None]:
# don't change
EXP_ROOT_DIR = f'experiments/{EXP_NAME}'
CACHE_ROOT_DIR = f'{EXP_ROOT_DIR}/cache'
SCENARIOS_SUMMARY = f'{SCENARIOS_ROOT_DIR}/scenarios.summary'

In [None]:
def parseScenariosSummary(infile):
    '''
    Parses the contents of the scenarios.summary file into a dictionary for easy lookup.
    
    Inputs
    infile: The filepath to the scenarios.summary file
    
    Returns a nested dictionary whose key is the scenario id (e.g. s1, s2, etc).  The second (nested) key
    is one of the following:
      p: the piano filepath
      o: the orchestra filepath
      po: the full mix filepath
      mStart: the index of the measure where the query starts (count starts from 1)
      mEnd: the index of the measure where the query ends (inclusive)
      pStart: the timestamp in the original full piano recording where the query begins (sec)
      oStart: the ground truth timestamp in the orchestra recording corresponding to the beginning of the query (sec)
    '''
    d = {}
    with open(infile, 'r') as f:
        for line in f:
            parts = line.split()
            assert len(parts) == 10
            scenario_id = parts[0]
            d[scenario_id] = {}
            d[scenario_id]['pfile'] = parts[1]
            d[scenario_id]['ofile'] = parts[2]
            d[scenario_id]['pofile'] = parts[3]
            d[scenario_id]['mStart'] = int(parts[4])
            d[scenario_id]['mEnd'] = int(parts[5])
            d[scenario_id]['pStart'] = float(parts[6])
            d[scenario_id]['pEnd'] = float(parts[7])
            d[scenario_id]['oStart'] = float(parts[8])
            d[scenario_id]['oEnd'] = float(parts[9])
                
    return d

In [None]:
def getCacheDir(d, scenario_id):
    '''
    Returns the filepath of the cache directory for the given scenario id.  Note that the cache directory
    can be shared as long as the orchestra and full mix files match, so its naming specifies information
    from both.
    
    Inputs
    d: dictionary summarizing the information in the scenarios.summary file
    scenario_id: the identifier of the scenario of interest (e.g. s1)
    '''
    
    o_id = os.path.splitext(os.path.basename(d[scenario_id]['ofile']))[0] # e.g. rach2_mov1_O1
    po_id = os.path.splitext(os.path.basename(d[scenario_id]['pofile']))[0] # e.g. rach2_mov1_PO1
    cache_id = o_id + '_' + po_id.split('_')[-1] # e.g. rach2_mov1_O1_PO1
    cache_dir = f'{CACHE_ROOT_DIR}/{cache_id}' 
    
    return cache_dir

In [None]:
def runOfflineProcessing(bypass = None):
    '''
    Runs the offline processing component (only) for every scenario using the specified system.
    
    Inputs
    bypass: specify a directory to simply copy over, will bypass the offline processing stage
    '''
    
    # bypass offline processing
    if bypass is not None:
        if not os.path.exists(EXP_ROOT_DIR):
            os.makedirs(EXP_ROOT_DIR)
        if not os.path.exists(CACHE_ROOT_DIR):
            os.system(f'cp -r {bypass} {CACHE_ROOT_DIR}')
        return
    
    # setup
    assert not os.path.exists(EXP_ROOT_DIR)
    os.makedirs(EXP_ROOT_DIR)
    os.mkdir(CACHE_ROOT_DIR)
    d = parseScenariosSummary(SCENARIOS_SUMMARY)
    
    # run offline processing component for each scenario
    for i in range(len(d)):
        
        scenario_id = f's{i+1}'
        scenario_dir = f'{SCENARIOS_ROOT_DIR}/{scenario_id}'
        cache_dir = getCacheDir(d, scenario_id)        
        print(f'Running offline processing for {scenario_id}')
        system.offline_processing(scenario_dir, cache_dir, hop_size, dtw_steps, dtw_weights)

In [None]:
runOfflineProcessing()

In [None]:
def runOnlineProcessing():
    '''
    Runs the online processing component for every scenario using the specified system.
    '''
    
    # verify
    assert os.path.exists(EXP_ROOT_DIR)
    assert os.path.exists(CACHE_ROOT_DIR)
    d = parseScenariosSummary(SCENARIOS_SUMMARY)
    
    # run online processing component for each scenario
    for i in range(len(d)):
        
        scenario_id = f's{i+1}'
        scenario_dir = f'{SCENARIOS_ROOT_DIR}/{scenario_id}'
        out_dir = f'{EXP_ROOT_DIR}/{scenario_id}' # where to save hypothesis file
        cache_dir = getCacheDir(d, scenario_id)        
        print(f'Running online processing for {scenario_id}')
        system.online_processing(scenario_dir, out_dir, cache_dir, hop_size/sr)

In [None]:
runOnlineProcessing()