### Load Dependencies

In [1]:
%matplotlib inline
%load_ext Cython

In [2]:
import numpy as np
import copy
from numpy.matlib import repmat
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter, ImageChops
import cv2
from skimage import filters, measure
from skimage.measure import label, regionprops
from skimage.color import label2rgb
from sklearn.cluster import KMeans
import matplotlib.patches as mpatches
from scipy.signal import convolve2d
from scipy.spatial import KDTree
import seaborn as sns
import pickle
import librosa as lb
import time
import cProfile
import os
import os.path
import pyximport; pyximport.install()
import multiprocessing
from ExtractBootlegFeatures import *

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


### Cython DTW

In [3]:
dtw_steps = [1,1,1,2,2,1] # dtw
dtw_weights = [1,1,2]

In [4]:
%%cython
import numpy as np
cimport numpy as np
cimport cython

import sys
import time


DTYPE_INT32 = np.int32
ctypedef np.int32_t DTYPE_INT32_t

DTYPE_FLOAT = np.float64
ctypedef np.float64_t DTYPE_FLOAT_t

cdef DTYPE_FLOAT_t MAX_FLOAT = float('inf')

# careful, without bounds checking can mess up memory - also can't use negative indices I think (like x[-1])
@cython.boundscheck(False) # turn off bounds-checking for entire function
def DTW_Cost_To_AccumCostAndSteps(Cin, parameter):
    '''
    Inputs
        C: The cost Matrix
    '''


    '''
    Section for checking and catching errors in the inputs
    '''

    cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] C
    try:
        C = np.array(Cin, dtype=DTYPE_FLOAT)
    except TypeError:
        print(bcolors.FAIL + "FAILURE: The type of the cost matrix is wrong - please pass in a 2-d numpy array" + bcolors.ENDC)
        return [-1, -1, -1]
    except ValueError:
        print(bcolors.FAIL + "FAILURE: The type of the elements in the cost matrix is wrong - please have each element be a float (perhaps you passed in a matrix of ints?)" + bcolors.ENDC)
        return [-1, -1, -1]

    cdef np.ndarray[np.uint32_t, ndim=1] dn
    cdef np.ndarray[np.uint32_t, ndim=1] dm
    cdef np.ndarray[DTYPE_FLOAT_t, ndim=1] dw
    # make sure dn, dm, and dw are setup
    # dn loading and exception handling
    if ('dn'  in parameter.keys()):
        try:

            dn = np.array(parameter['dn'], dtype=np.uint32)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dn (row steps) is wrong - please pass in a 1-d numpy array that holds uint32s" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "The type of the elements in dn (row steps) is wrong - please have each element be a uint32 (perhaps you passed a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.uint32)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        dn = np.array([1, 1, 0], dtype=np.uint32)
    # dm loading and exception handling
    if 'dm'  in parameter.keys():
        try:
            dm = np.array(parameter['dm'], dtype=np.uint32)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dm (col steps) is wrong - please pass in a 1-d numpy array that holds uint32s" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "FAILURE: The type of the elements in dm (col steps) is wrong - please have each element be a uint32 (perhaps you passed a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.uint32)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        print(bcolors.FAIL + "dm (col steps) was not passed in (gave default value [1,0,1]) " + bcolors.ENDC)
        dm = np.array([1, 0, 1], dtype=np.uint32)
    # dw loading and exception handling
    if 'dw'  in parameter.keys():
        try:
            dw = np.array(parameter['dw'], dtype=DTYPE_FLOAT)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dw (step weights) is wrong - please pass in a 1-d numpy array that holds floats" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "FAILURE:The type of the elements in dw (step weights) is wrong - please have each element be a float (perhaps you passed ints or a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.float64)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        dw = np.array([1, 1, 1], dtype=DTYPE_FLOAT)
        print(bcolors.FAIL + "dw (step weights) was not passed in (gave default value [1,1,1]) " + bcolors.ENDC)

    
    '''
    Section where types are given to the variables we're going to use 
    '''
    # create matrices to store our results (D and E)
    cdef DTYPE_INT32_t numRows = C.shape[0] # only works with np arrays, use np.shape(x) will work on lists? want to force to use np though?
    cdef DTYPE_INT32_t numCols = C.shape[1]
    cdef DTYPE_INT32_t numDifSteps = np.size(dw)

    cdef unsigned int maxRowStep = max(dn)
    cdef unsigned int maxColStep = max(dm)

    cdef np.ndarray[np.uint32_t, ndim=2] steps = np.zeros((numRows,numCols), dtype=np.uint32)
    cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] accumCost = np.ones((maxRowStep + numRows, maxColStep + numCols), dtype=DTYPE_FLOAT) * MAX_FLOAT

    cdef DTYPE_FLOAT_t bestCost
    cdef DTYPE_INT32_t bestCostIndex
    cdef DTYPE_FLOAT_t costForStep
    cdef unsigned int row, col
    cdef unsigned int stepIndex

    '''
    The start of the actual algorithm, now that all our variables are set up
    '''
    # initializing the cost matrix - depends on whether its subsequence DTW
    # essentially allow us to hop on the bottom anywhere (so could start partway through one of the signals)
    if parameter['SubSequence']:
        for col in range(numCols):
            accumCost[maxRowStep, col + maxColStep] = C[0, col]
    else:
        accumCost[maxRowStep, maxColStep] = C[0,0]

    # filling the accumulated cost matrix
    for row in range(maxRowStep, numRows + maxRowStep, 1):
        for col in range(maxColStep, numCols + maxColStep, 1):
            bestCost = accumCost[<unsigned int>row, <unsigned int>col] # initialize with what's there - so if is an entry point, then can start low
            bestCostIndex = 0
            # go through each step, find the best one
            for stepIndex in range(numDifSteps):
                #costForStep = accumCost[<unsigned int>(row - dn[<unsigned int>(stepIndex)]), <unsigned int>(col - dm[<unsigned int>(stepIndex)])] + dw[<unsigned int>(stepIndex)] * C[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)]
                costForStep = accumCost[<unsigned int>((row - dn[(stepIndex)])), <unsigned int>((col - dm[(stepIndex)]))] + dw[stepIndex] * C[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)]
                if costForStep < bestCost:
                    bestCost = costForStep
                    bestCostIndex = stepIndex
            # save the best cost and best cost index
            accumCost[row, col] = bestCost
            steps[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)] = bestCostIndex

    # return the accumulated cost along with the matrix of steps taken to achieve that cost
    return [accumCost[maxRowStep:, maxColStep:], steps]

@cython.boundscheck(False) # turn off bounds-checking for entire function
def DTW_GetPath(np.ndarray[DTYPE_FLOAT_t, ndim=2] accumCost, np.ndarray[np.uint32_t, ndim=2] stepsForCost, parameter):
    '''

    Parameter should have: 'dn', 'dm', 'dw', 'SubSequence'
    '''

    cdef np.ndarray[unsigned int, ndim=1] dn
    cdef np.ndarray[unsigned int, ndim=1] dm
    cdef np.uint8_t subseq
    # make sure dn, dm, and dw are setup
    if ('dn'  in parameter.keys()):
        dn = parameter['dn']
    else:
        dn = np.array([1, 1, 0], dtype=DTYPE_INT32)
    if 'dm'  in parameter.keys():
        dm = parameter['dm']
    else:
        dm = np.array([1, 0, 1], dtype=DTYPE_INT32)
    if 'SubSequence' in parameter.keys():
        subseq = parameter['SubSequence']
    else:
        subseq = 0

    cdef np.uint32_t numRows
    cdef np.uint32_t numCols
    cdef np.uint32_t curRow
    cdef np.uint32_t curCol
    cdef np.uint32_t endCol
    cdef DTYPE_FLOAT_t endCost

    numRows = accumCost.shape[0]
    numCols = accumCost.shape[1]

    # either start at the far corner (non sub-sequence)
    # or start at the lowest cost entry in the last row (sub-sequence)
    # where all of the signal along the row has been used, but only a 
    # sub-sequence of the signal along the columns has to be used
    curRow = numRows - 1
    if subseq:
        curCol = np.argmin(accumCost[numRows - 1, :])
    else:
        curCol = numCols - 1

    endCol = curCol
    endCost = accumCost[curRow, curCol]

    cdef np.uint32_t curRowStep
    cdef np.uint32_t curColStep
    cdef np.uint32_t curStepIndex


    cdef np.ndarray[np.uint32_t, ndim=2] path = np.zeros((2, numRows + numCols), dtype=np.uint32) # make as large as could need, then chop at the end
    path[0, 0] = curRow
    path[1, 0] = curCol

    cdef np.uint32_t stepsInPath = 1 # starts at one, we add in one before looping
    cdef np.uint32_t stepIndex = 0
    cdef np.int8_t done = (subseq and curRow == 0) or (curRow == 0 and curCol == 0)
    while not done:
        if accumCost[curRow, curCol] == MAX_FLOAT:
            print('A path is not possible')
            break

        # you're done if you've made it to the bottom left (non sub-sequence)
        # or just the bottom (sub-sequence)
        # find the step size
        curStepIndex = stepsForCost[curRow, curCol]
        curRowStep = dn[curStepIndex]
        curColStep = dm[curStepIndex]
        # backtrack by 1 step
        curRow = curRow - curRowStep
        curCol = curCol - curColStep
        # add your new location onto the path
        path[0, stepsInPath] = curRow
        path[1, stepsInPath] = curCol
        stepsInPath = stepsInPath + 1
        # check to see if you're done
        done = (subseq and curRow == 0) or (curRow == 0 and curCol == 0)

    # reverse the path (a matrix with two rows) and return it
    return [np.fliplr(path[:, 0:stepsInPath]), endCol, endCost]

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [5]:
def alignBootlegScores(query, ref, numRefNotes, steps = [1,1,1,2,2,1], weights = [1,1,2], optimized=True):
    if optimized: # Cython implementation
        # set params
        assert len(steps) % 2 == 0, "The length of steps must be even."
        dn = np.array(steps[::2], dtype=np.uint32)
        dm = np.array(steps[1::2], dtype=np.uint32)
        dw = weights
        subsequence = True
        parameter = {'dn': dn, 'dm': dm, 'dw': dw, 'SubSequence': subsequence}

        # Compute cost matrix
        #cost = costMetric(query.T, ref)
        numQueryNotes = np.sum(query, axis=0)
        cost = normalizedCostMetric(query, ref, numQueryNotes, numRefNotes)

        # DTW
        [D, s] = DTW_Cost_To_AccumCostAndSteps(cost, parameter)
        [wp, endCol, endCost] = DTW_GetPath(D, s, parameter)

        # Reformat the output
        wp = wp.T[::-1]
    else: # librosa implementation
        steps = np.array(steps).reshape((-1,2))
        D, wp = lb.sequence.dtw(query, ref, step_sizes_sigma = steps, weights_mul = weights, subseq = True, metric = costMetric)
    return D, wp

### Get Bootleg Score of Query Image

In [6]:
imagefile = 'data/queries/p111_q2.jpg'
pim1 = Image.open(imagefile).convert('L')
pim1

FileNotFoundError: [Errno 2] No such file or directory: 'data/queries/p111_q2.jpg'

In [None]:
bscore_query=processQuery(imagefile)
visualizeLongBootlegScore(bscore_query, [13,15,17,19,21,35,37,39,41,43])

### Load Reverse Index

In [7]:
pickle_file = 'experiments/indices/SINGULAR.pkl'
with open(pickle_file, 'rb') as f:
    rindex = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: 'experiments/indices/SINGULAR.pkl'

### Generate Histograms

In [8]:
def bootlegHash(arr):
    bitstring = ""
    for i in range(len(arr)):
        if arr[i]==1:
            bitstring+="1"
        else:
            bitstring +="0"
    bitstring = bitstring+"00"
    hashint = int(bitstring, 2)
    hashint = np.uint64(hashint)
    return hashint

In [9]:
def showHistograms(arr,numBins):
    plt.hist(arr, normed=False, bins=numBins)
    plt.show()

In [10]:
def getOffsetDelta(bscore_query, rindex):
    offsetDict = {}
    for index in range(len(bscore_query.T)):
        hashkey = bootlegHash(bscore_query.T[index])
        if hashkey ==0 or not hashkey in rindex:
            continue
        rindex_hash = rindex[hashkey]
        #print(len(rindex_hash))
        for key in rindex_hash:
            #DONT USE np.ARRAY
            offset = [i - index for i in rindex_hash[key]]
            if key in offsetDict:
                offsetDict[key].extend(offset)
            else:
                offsetDict[key]=offset
    return offsetDict

In [11]:
def getOffsetDeltaNGram(bscore_query, rindex, N_Gram = 3):
    offsetDict = {}
    for index in range(len(bscore_query.T)):
        hashkey = []
        try:
            for i in range(N_Gram):
                hashkey.append(bootlegHash(bscore_query.T[index+i]))
        except IndexError:
            continue
        hashkey = tuple(hashkey)
        if hashkey == 0 or not hashkey in rindex:
            continue
        rindex_hash = rindex[hashkey]
        for key in rindex_hash:
            #DONT USE np.ARRAY
            offset = [i - index for i in rindex_hash[key]]
            if key in offsetDict:
                offsetDict[key].extend(offset)
            else:
                offsetDict[key]=offset
    return offsetDict

In [12]:
def getOffsetDeltaDynamicStaticN_GRAM(bscore_query, rindices, counts, threshold, Max_N = 4):
    offsetDict = {}
    for index in range(len(bscore_query.T)):
        N_Gram = 1
        while(True):
            hashkey = []
            try:
                for i in range(N_Gram):
                    hashkey.append(bootlegHash(bscore_query.T[index+i]))
            except IndexError:
                break
            rindex = rindices[N_Gram-1]
            hashkey = tuple(hashkey)
            if not hashkey in rindex:
                break
            numMatches = counts[N_Gram-1][hashkey]
            if numMatches < threshold or N_Gram == Max_N:
                rindex_hash = rindex[hashkey]
                for key in rindex_hash:
                    #DONT USE np.ARRAY
                    offset = [i - index for i in rindex_hash[key]]
                    if key in offsetDict:
                        offsetDict[key].extend(offset)
                    else:
                        offsetDict[key]=offset
                break
            N_Gram+=1
    return offsetDict

In [13]:
def getOffsetDeltaDynamicAdaptiveN_GRAM(bscore_query, rindices, counts, K_total, Max_N = 4):
    # K_avg is the total allocation to each fingerprint
    K_avg = K_total / len(bscore_query.T)
    K_carry = 0
    offsetDict = {}
    for index in range(len(bscore_query.T)):
        N_Gram = 1
        while(True):
            hashkey = []
            try:
                for i in range(N_Gram):
                    hashkey.append(bootlegHash(bscore_query.T[index+i]))
            except IndexError:
                break
            rindex = rindices[N_Gram-1]
            hashkey = tuple(hashkey)
            if not hashkey in rindex:
                break
            numMatches = counts[N_Gram-1][hashkey]
            if numMatches < K_avg + K_carry or N_Gram == Max_N:
                K_carry += K_avg - numMatches
                #NOTE: We try to avoid negative K_carry, but because of the cap in Max_N, we need to be careful.
                rindex_hash = rindex[hashkey]
                for key in rindex_hash:
                    offset = [i - index for i in rindex_hash[key]]
                    if key in offsetDict:
                        offsetDict[key].extend(offset)
                    else:
                        offsetDict[key]=offset
                break
            N_Gram+=1
    return offsetDict

In [14]:
def rankHistograms(offsetDict, rindex, bin_size=10):
    histograms = {}
    bin_size = 10
    pieceScores = []
    numShow = 5
    for key in offsetDict:
        h = offsetDict[key]
        maxh = max(h)
        minh = min(h)
        if(maxh > minh+bin_size):
            hist = [0 for i in range(int((maxh-minh)/bin_size)+2)]
            for i in h:
                hist[int((i-minh)/bin_size)]+=1
            score = max(hist)
            pieceScores.append((key, score))
            
    pieceScores = sorted(pieceScores, key = lambda x:x[1], reverse=True)
    return pieceScores, histograms



In [15]:
offsetDict = getOffsetDelta(bscore_query,rindex)
pieceScores, histograms = rankHistograms(offsetDict,rindex)

NameError: name 'bscore_query' is not defined

In [16]:
def displayHist(pieceScores, pieceNum):
    pieceScores1 = sorted(pieceScores, key = lambda x:int(x[0][1:]))
    print(pieceScores1[pieceNum-1][0])
    h,numBins = histograms[pieceScores1[pieceNum-1][0]]
    showHistograms(h,numBins)

In [17]:
for i in range(5):
    print(pieceScores[i])
h,numBins = histograms[pieceScores[0][0]]
showHistograms(h,numBins)

displayHist(pieceScores, 1)

NameError: name 'pieceScores' is not defined

In [18]:
pieceStr = pieceScores[0][0]
midi_db_dir = 'experiments/train/db'
midi_bscore_pkl = '{}/{}.pkl'.format(midi_db_dir,pieceStr)
if not os.path.exists(midi_bscore_pkl):
    midi_bscore_pkl = '{}/{}.pkl'.format(midi_db_dir,'p171')
midi_d = loadMidiBootlegScore(midi_bscore_pkl)

bscore_midi, miditimes, num_notes, stafflines = loadMidiBootlegScore(midi_bscore_pkl)
D, wp = alignBootlegScores(bscore_query, bscore_midi, num_notes, dtw_steps, dtw_weights)
matchSegmentTime, matchSegmentTick = getPredictedTimestamps(wp, miditimes)

NameError: name 'pieceScores' is not defined

In [19]:
def getRank(pieceScores,correctPiece):
    rank = 200
    for i in range(len(pieceScores)):
        if pieceScores[i][0]==correctPiece:
            rank=i+1
    return rank

In [20]:
getRank(pieceScores, os.path.basename(imagefile).split('_')[0])

NameError: name 'pieceScores' is not defined

### Run system on all files

In [21]:
def processSingleQuery(imagefile, rindex, counts = None, outfile = None, mode = "N_GRAM"):
    if outfile is not None and os.path.exists(outfile):
        print("SKIPPED",outfile)
        return
    profileStart = time.time()
    
    # Get Bootleg Score
    bscore_query=processQuery(imagefile)
    
    # Generate and rank histograms
    if mode == "NORMAL":
        offsetDict = getOffsetDelta(bscore_query, rindex)
    elif mode == "N_GRAM":
        offsetDict = getOffsetDeltaNGram(bscore_query, rindex, N_Gram = 1)
    elif mode == "Dynamic_Static":
        offsetDict = getOffsetDeltaDynamicStaticN_GRAM(bscore_query, rindices=rindex, counts=counts, threshold=1000, Max_N=4)
    elif mode == "Dynamic_Adaptive":
        offsetDict = getOffsetDeltaDynamicAdaptiveN_GRAM(bscore_query, rindices=rindex, counts=counts, K_total=50000, Max_N=4)
    pieceScores, histograms = rankHistograms(offsetDict, rindex)
    
    # Profile & save to file
    profileEnd = time.time()
    profileDur = profileEnd - profileStart
    print(profileDur)
    saveToFile(outfile, imagefile, pieceScores, profileDur)

    return pieceScores

In [22]:
def saveToFile(outfile, imagefile, pieceScores, profileDur):
    if outfile:
        with open(outfile, 'wb') as f:
            query = os.path.splitext(os.path.basename(imagefile))[0]
            pickle.dump((query,pieceScores, profileDur),f)

In [23]:
st = time.time()
pickle_file = 'experiments/indices/N_GRAM_1_ALL.pkl'
with open(pickle_file, 'rb') as f:
    rindex1 = pickle.load(f)
print("LOADED pkl file")
rindex_filter = rindex1
print("Total Time: ",time.time()-st)

FileNotFoundError: [Errno 2] No such file or directory: 'experiments/indices/N_GRAM_1_ALL.pkl'

In [24]:
# Condense if using selectedfp
condense = False
if condense :
    rindex_filter = copy.deepcopy(rindex1)
    for key in rindex1:
        s = 0
        for piece in rindex1[key].keys():
            s +=len(rindex1[key][piece])
        if s > 10000:
            rindex_filter.pop(key, None)
print("Total Time: ",time.time()-st)

Total Time:  3.7014214992523193


In [25]:
st = time.time()
Max_N = 4
rindex_filter = []
fpMaps = []
counts = []
for i in range(1, Max_N+1):
    print("LOADING {}".format(i))
    pickle_file = 'experiments/indices/N_GRAM_{}_ALL.pkl'.format(i)
    count_file = 'experiments/indices/N_GRAM_{}_COUNT.pkl'.format(i)
    with open(pickle_file, 'rb') as f:
        rindex_filter.append(pickle.load(f))
        f.flush()
        f.close()
    with open(count_file, 'rb') as f:
        counts.append(pickle.load(f))
        f.flush()
        f.close()
print("LOADED pkl files")
print("Total Time: ",time.time()-st)

LOADING 1


FileNotFoundError: [Errno 2] No such file or directory: 'experiments/indices/N_GRAM_1_ALL.pkl'

In [34]:
# process single query
#counts = []
query_file = '/home/dyang/SheetMidiSearchRetrieval/data/queries/p2_q1.jpg'
processSingleQuery(query_file, rindex_filter, counts = counts, mode = "Dynamic_Static")

Processing /home/dyang/SheetMidiSearchRetrieval/data/queries/p2_q1.jpg
1.3250157833099365


[('dGouin,_PierreNocturnes,_Op.9_**86550', 54),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**86550', 54),
 ('p2', 54),
 ('dGouin,_PierreNocturnes,_Op.9_**34915', 33),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**34915', 33),
 ('dGouin,_PierreNocturnes,_Op.9_**80717', 32),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**80717', 32),
 ('dGouin,_PierreNocturnes,_Op.9_**00470', 31),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**00470', 31),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**34916', 30),
 ('dGouin,_PierreNocturnes,_Op.9_**34916', 30),
 ('dGouin,_PierreNocturnes,_Op.9_**112335', 29),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**112335', 29),
 ('dGouin,_PierreNocturnes,_Op.9_**113996', 23),
 ('dChopin,_Fr%C3%A9d%C3%A9ricNocturnes,_Op.9_**113996', 23),
 ('dStenhammar,_WilhelmPiano_Sonata,_Op.12_**400268', 10),
 ('dCramer,_HenriAm_Clavier,_Op.120_**522114', 9),
 ('dNeupert,_Edmund12_New_Octave_Studies_**309696', 8),
 ('dLebierre,_OlivierDie_Zauberin_von_der_Schelde,_Op.

In [38]:
def processQuery_wrapper(queryfile, rindex, counts,  outdir, mode):
    # wrapper for running multiple jobs in parallel
    basename = os.path.splitext(os.path.basename(queryfile))[0] # e.g. p1_q1
    hyp_outfile = "{}/{}.hyp".format(outdir, basename)
    piece = basename.split('_')[0]
    return processSingleQuery(queryfile, rindex, counts,  hyp_outfile, mode)

In [None]:
# process all queries
#pickle_file = 'experiments/indices/**index.pkl'
pickle_file = 'experiments/indices/N_GRAM_1_ALL.pkl'
pickle_map = 'experiments/indices/N_GRAM_1_ALL_MAP.pkl'
query_list = 'cfg_files/query.test.list' # list of query images
outdir = 'experiments/lol/hyp' # where to save hypothesis output files
mode = "Dynamic_Static"

# prep output directory
if not os.path.isdir(outdir):
    os.makedirs(outdir)

# load reverse index. Recommend keeping load=False and loading it earlier.
load = False
if load:
    print("LOADING RINDEX")
    rindex1 = []
    with open(pickle_file, 'rb') as f:
        rindex1 = pickle.load(f)
    with open(pickle_map, 'rb') as f:
        fpMap = pickle.load(f)
    condense = False
    rindex_filter = rindex1
    if condense :
        rindex_filter = copy.deepcopy(rindex1)
        for key in rindex1:
            s = 0
            for piece in rindex1[key].keys():
                s +=len(rindex1[key][piece])
            if s > 200000:
                rindex_filter.pop(key, None)
if mode == "N_GRAM":
    counts = []

print("STARTING PROCESSING")
# number of cores to use
multiprocess = False
if multiprocess:
    n_cores = 25 #multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=n_cores)

inputs = []
with open(query_list, 'r') as f:
    for line in f:
        inputs.append((line.rstrip(), outdir))

if multiprocess:
    # process queries in parallel
    outputs = list(pool.starmap(processQuery_wrapper, inputs))
else:
    for i in inputs:
        processQuery_wrapper(i[0],rindex_filter, counts,i[1], mode = mode)


STARTING PROCESSING
Processing data/queries/p2_q1.jpg
1.1773545742034912
Processing data/queries/p2_q2.jpg
1.108424186706543
Processing data/queries/p2_q3.jpg
1.115931749343872
Processing data/queries/p2_q4.jpg
1.0421009063720703
Processing data/queries/p2_q5.jpg
1.1063766479492188
Processing data/queries/p2_q6.jpg
1.0875537395477295
Processing data/queries/p2_q7.jpg
1.0651390552520752
Processing data/queries/p2_q8.jpg
1.197953701019287
Processing data/queries/p2_q9.jpg
1.1133928298950195
Processing data/queries/p2_q10.jpg
1.07210111618042
Processing data/queries/p3_q1.jpg
1.1783912181854248
Processing data/queries/p3_q2.jpg
1.5363390445709229
Processing data/queries/p3_q3.jpg
1.3707642555236816
Processing data/queries/p3_q4.jpg
1.3239538669586182
Processing data/queries/p3_q5.jpg
1.1494011878967285
Processing data/queries/p3_q6.jpg
1.3979241847991943
Processing data/queries/p3_q7.jpg
1.417079210281372
Processing data/queries/p3_q8.jpg
1.2414305210113525
Processing data/queries/p3_q9.j

1.0249416828155518
Processing data/queries/p20_q5.jpg
1.069657802581787
Processing data/queries/p20_q6.jpg
1.0845026969909668
Processing data/queries/p20_q7.jpg
1.0512032508850098
Processing data/queries/p20_q8.jpg
0.9783282279968262
Processing data/queries/p20_q9.jpg
1.0152530670166016
Processing data/queries/p20_q10.jpg
1.0580902099609375
Processing data/queries/p22_q1.jpg
1.1892509460449219
Processing data/queries/p22_q2.jpg
1.0979809761047363
Processing data/queries/p22_q3.jpg
1.0960845947265625
Processing data/queries/p22_q4.jpg
1.2783503532409668
Processing data/queries/p22_q5.jpg
1.199873924255371
Processing data/queries/p22_q6.jpg
1.3957412242889404
Processing data/queries/p22_q7.jpg
1.1799733638763428
Processing data/queries/p22_q8.jpg
1.1292059421539307
Processing data/queries/p22_q9.jpg
1.1506874561309814
Processing data/queries/p22_q10.jpg
1.118518352508545
Processing data/queries/p23_q1.jpg
1.1785292625427246
Processing data/queries/p23_q2.jpg
1.221818208694458
Processing 