In [1]:
# System modules
from queue import Queue
from threading import Thread
from collections import Counter
import time
import random
import itertools

#import curses
#stdscr = curses.initscr()
#stdscr.clear()

In [2]:
INVALID_ID    = -1        # stop, or empty, or not available etc.

NDATACENTERS    = 3

WAITTIME_FOR_CHECKING_INPUTDATASET = 5.1       # how long should the job wait for checking again.


DATASET_SIZE_MIN = 100
DATASET_SIZE_MAX = 1000

NUMBER_OF_FILES = 10000


'''
#      parameters and variables for each datacenter with index 0, 1, ...
'''
vecWaitAfterJob = [5, 6, 3]         # [s] 1/performance each datacenter per job

# vecVecWaitTimes[src][dst], each 0-based just the storage element
vecVecWaitTimes = []
vecVecWaitTimes.append( [ 0, 10,  5] ) # down-link metric to data center 1
vecVecWaitTimes.append( [10,  0,  5] ) # ... to data center 2
vecVecWaitTimes.append( [ 5, 15,  0] ) # ... to data center 3

In [None]:
# storage elements each data center (storage1:datacenter1, storage2:datacenter2, ...)
storFiles = []
'''
storFiles.append( Counter([1, 2, 3])     )    #storage element 1
storFiles.append( Counter([4,5,6,7,8,9]) )    #storage element 2
storFiles.append( Counter([10,11])       )    #storage element 3
'''
storFiles.append( dict({1:1, 2:1, 3:1})           )    #storage element 1
storFiles.append( dict({4:1,5:1,6:1,7:1,8:1,9:1}) )    #storage element 2
storFiles.append( dict({10:1,11:1})               )    #storage element 3
# here the files go temporarily when jobs waiting on them /
# = the input data set is transfered to the storage element of the data center
#   where the job executes.
# This could be used for locking the files / duplicating etc.
storFilesForJobs = []
storFilesForJobs.extend([dict({}), dict(), dict()]) # no input data sets yet

jobsLocal  = [0, 0, 0] #no job running
filesLocal = [0, 0, 0] #no job running

qJobs = Queue() # Job Queue. Use a queue enclosure like here
     # anyway, one job queue, jobs taken from threads each worker (data center) job pipeline
     # queueEncl.put() and queueEncl.get()
     # queueEncl.join() blocks until all items got out of the queue

# file pipelines from storage element to each other (1:n)
# fill in tupels 
#    e.g. qTransfers[0].put([2, 0])               # (fileid, destinationStorage)
qTransfers = [Queue(), Queue(), Queue()]

In [None]:
# give the status for each datacenter in order to print pretty
#def updateAndPrintStatus(jobIds,files=["","",""]):
def updateAndPrintStatus(threadnr, nJobExecuted, jobid):
    # fields for each data center: execution and storage
    global jobsLocal
    global filesLocal

    jobsLocal[threadnr] = jobid
    #if files[i]: filesLocal[i] = files[i]

    strout = ""
    if threadnr == 0:   strout = str(" |  %05d\'th:job%05d  |                    |                    | ")
    elif threadnr == 1: strout = str(" |                     | %05d\'th:job%05d  |                    | ")
    elif threadnr == 2: strout = str(" |                     |                    | %05d\'th:job%05d  | ")

    print( " job%05d       job%05d      job%05d " % \
           (int(jobsLocal[0]),int(jobsLocal[1]),int(jobsLocal[2])) )
    #print( strout % (nJobExecuted, jobid) )


# generate random data input set for job
# return: file list [fileX, fileY, ...] for [dataCenter0,dataCenter1,...]
# a value -1 means don't take a file from there
def getInputDataSet(jobid): #jobid not used now
    inputDataSet = []  #.append, invalid files are INVALID_ID
    population = range(NUMBER_OF_FILES)
    #valueselect = random.randrange( n )      # random number in this range
            
#    size = random.randint(DATASET_SIZE_MIN, DATASET_SIZE_MAX)
    size = random.randint(1, 10)
    inputDataSet = random.sample(population, 3)

    print("inputDataSet generated for jobid", jobid, "is", inputDataSet)
    
    #if len(set(inputDataSet)) == 1: getInputDataSet(jobid) # only INVALID_ID's ->try again
    return inputDataSet

def areFilesInStorage(fileset, storageid, printIfSuccess=True):
    '''
    fileset = [file1, file2, ..]
    storageid == data center id indicates the i in storFilesForJobs[i]
    '''
    for f in fileset:
        if f == INVALID_ID:
            continue
        if f not in storFilesForJobs[storageid]:
            #print "areFilesInStorage", fileset, "at", storageid, "is false"
            return False

    if printIfSuccess:
        if storageid == 0:   string = str(" |found in=")+str(fileset)
        elif storageid == 1: string = str(" |                     |found ")+str(fileset)
        elif storageid == 2: string = str(" |                     |                    |found ")+str(fileset)
        print( string )
    return True

def deleteFilesInStorage(fileset, storageid):
    '''
    fileset = [file1, file2, ..]
    storageid == data center id
    '''
    for f in fileset:
        if f == INVALID_ID:
            continue
        if f not in storFilesForJobs[storageid]:
            print("deleteFilesInStorage", f, "at", storageid, "not available. cannot delete!")
        else:
            del storFilesForJobs[storageid][f]

##################### THREAD ############################
# One thread over-all which produces jobs
def threadProdJob(threadnr, q):
    print("threadProdJob: Kicking off jobs ...")
    jobIdx = 0

    while True:
        if q.qsize() < 10:
            #print 'threadProdJob: job', str(jobIdx)
            q.put(jobIdx)
            jobIdx+=1
        else:
            time.sleep(1)
thread = Thread(target=threadProdJob,args=(-1, qJobs,)); thread.setDaemon(True); thread.start();


##################### THREAD ############################
# One job thread each data center
# The task of the thread is to process new jobs in the queue (wait until they arrive)
def threadExeJob(computingElement, q):
    nJobExecuted = 1
    #print 'Datacenter %s processing job ' % computingElement, jobid

    while True:
        jobid = q.get()                # get job
        inputFiles = getInputDataSet(jobid)     # get its input files = input dataset for job
        # transfer file to this storage element
        for i in range(len(inputFiles)):
            
            ############################################
            ### LOOK UP FILE AND THEN TRANSFER
            
             fileIn = inputFiles[i]    #only one file per storage element
             if fileIn == INVALID_ID:  # no file
                 continue
             qTransfers[i].put([fileIn, computingElement])        # (fileid, destinationStorage)
             
        # wait for input dataset
        while True:
            time.sleep(WAITTIME_FOR_CHECKING_INPUTDATASET)  #just random
            if areFilesInStorage(inputFiles, computingElement): # computingElement == storageElement
                break
            
        # delete the input dataset in the input cache 
        deleteFilesInStorage(inputFiles, computingElement)
        
        updateAndPrintStatus(computingElement, nJobExecuted, jobid)
        nJobExecuted += 1

        exectime = vecWaitAfterJob[computingElement]
        time.sleep( exectime )
        q.task_done( )

# Set up one thread per data center
print( " |d                 Starting job queues ...                       |" )
for threadnr in range(0, NDATACENTERS):
    if threadnr == 0:   string = str(" |   thread started.   |                    |                    |")
    elif threadnr == 1: string = str(" |                     |  thread started.   |                    |")
    elif threadnr == 2: string = str(" |                     |                    |  thread started.   |")
    print( string )
    thread = Thread(target=threadExeJob,args=(threadnr, qJobs,)); thread.setDaemon(True); thread.start()


##################### THREAD ############################
# queue = output file queue each datacenter
# queue id == datacenter id (0, 1, ...)
def threadProcessTransferQueue(queueNr):
    '''
    queueNr determines the src storage element == queue (0, or 1, ...) 
    The queue emulates the transfers over each link, so select the corresponding
    vecVecWaitTimes-vector for the wait times for each up-link.
    The targets are the cache storages storFilesForJobs[]. This is where each job
    reads the input data set
    '''
    print( "threadProcessTransferQueue(", queueNr, ") started." )

    while True:
        (fileid,dst) = qTransfers[queueNr].get()               # (fileid, destinationStorage)

        waittime = vecVecWaitTimes[queueNr][dst]    #[src][dst]
        #print "file transfer queue ", str(queueNr), " receives file ", str(fileid),\
        #      " for dst ", str(dst), " with delay of ", str(waittime)
        time.sleep(waittime)
        if storFilesForJobs[dst].get(fileid):
            print( "threadProcessTransferQueue(" +str(queueNr) +")" )
        storFilesForJobs[dst][fileid] = 1      # add file to job input cache

        #for key,val in storFilesForJobs[dst].iteritems():   # iteritems()/ items() in 3.0
        #    print "storFilesForJobs comprises", key, val
        #print "storFilesForJobs0 comprises", storFilesForJobs[0]
        #print "storFilesForJobs1 comprises", storFilesForJobs[1]
        #print "storFilesForJobs2 comprises", storFilesForJobs[2]

# setup one thread per datacenter = output pipe of files
for threadnr in range(0, NDATACENTERS):
    thread = Thread(target=threadProcessTransferQueue,args=(threadnr,));
    thread.setDaemon(True); thread.start()
#qTransfers[0].put([2, 0])               # (fileid, destinationStorage)



# Now main thread ...
#print "*** Main thread stalled"
#q.join()    # wait for all other threads to complete

#raw_input()
#exit()


try:
    while True:
        time.sleep(.1)
except KeyboardInterrupt:  # Ctrl+C
    pass

'''
import os
os.system('pause') #NO! ?!
'''

threadProdJob: Kicking off jobs ...
 |d                 Starting job queues ...                       |
 |   thread started.   |                    |                    |
inputDataSet generated for jobid 0  |                     |  thread started.   |                    |is
inputDataSet generated for jobidinputDataSet generated for jobid  |                     |                    |  thread started.   |  threadProcessTransferQueue(threadProcessTransferQueue(threadProcessTransferQueue([540, 1588, 5084]
12   
2  01  isis ) started.) started.  ) started.

[5276, 2541, 9605][604, 6013, 8389]


 |found in=[540, 1588, 5084]
 job00000       job00000      job00000 
inputDataSet generated for jobid 3 is [359, 8483, 1850]
 |                     |found [5276, 2541, 9605]
 job00000       job00001      job00000 
 |                     |                    |found [604, 6013, 8389]
 job00000       job00001      job00002 
inputDataSet generated for jobid 4 is [4465, 8269, 1554]
 |found in=[359, 8483, 

inputDataSet generated for jobid 53 is [941, 3619, 4012]
inputDataSet generated for jobid 54 is [9729, 6396, 4437]
 |                     |found [8548, 5166, 9196]
 job00050       job00052      job00051 
 |                     |                    |found [941, 3619, 4012]
 job00050       job00052      job00053 
 |found in=[9729, 6396, 4437]
 job00054       job00052      job00053 
inputDataSet generated for jobid 55 is [3724, 5758, 6068]
inputDataSet generated for jobid 56 is [3864, 4001, 737]
inputDataSet generated for jobid 57 is [8523, 5425, 9415]
 |                     |found [3724, 5758, 6068]
 job00054       job00055      job00053 
 |                     |                    |found [3864, 4001, 737]
 job00054       job00055      job00056 
inputDataSet generated for jobid 58 is [6672, 737, 5601]
inputDataSet generated for jobid 59 is [5917, 8057, 4726]
 |                     |                    |found [6672, 737, 5601]
 job00054       job00055      job00058 
 |found in=[8523, 5425

 |                     |                    |found [6931, 4105, 9544]
 job00104       job00106      job00107 
inputDataSet generated for jobid 109 is [4251, 7214, 7338]
inputDataSet generated for jobid 110 is [6889, 3368, 5817]
 |found in=[2399, 6430, 9216]
 job00108       job00106      job00107 
inputDataSet generated for jobid 111 is [9109, 6831, 8779]
 |                     |found [4251, 7214, 7338]
 job00108       job00109      job00107 
 |                     |                    |found [6889, 3368, 5817]
 job00108       job00109      job00110 
inputDataSet generated for jobid 112 is [6545, 261, 8344]
inputDataSet generated for jobid 113 is [6906, 895, 4778]
 |found in=[9109, 6831, 8779]
 job00111       job00109      job00110 
 |                     |                    |found [6545, 261, 8344]
 job00111       job00109      job00112 
inputDataSet generated for jobid 114 is [2166, 8987, 3977]
inputDataSet generated for jobid 115 is [5073, 6172, 9191]
 |                     |found [

 |                     |                    |found [5850, 7674, 4546]
 job00161       job00160      job00162 
inputDataSet generated for jobid 164 is [9356, 5245, 4946]
inputDataSet generated for jobid 165 is [185, 986, 6996]
 |                     |found [4263, 328, 8246]
 job00161       job00163      job00162 
 |                     |                    |found [9356, 5245, 4946]
 job00161       job00163      job00164 
inputDataSet generated for jobid 166 is [608, 5146, 5400]
inputDataSet generated for jobid 167 is [4199, 532, 4113]
 |found in=[185, 986, 6996]
 job00165       job00163      job00164 
inputDataSet generated for jobid 168 is [7287, 3220, 8575]
 |                     |                    |found [608, 5146, 5400]
 job00165       job00163      job00166 
inputDataSet generated for jobid 169 is [3766, 8077, 456]
 |                     |found [4199, 532, 4113]
 job00165       job00167      job00166 
inputDataSet generated for jobid 170 is [9634, 3992, 1870]
 |found in=[7287, 3

inputDataSet generated for jobid 218 is [7365, 6221, 957]
inputDataSet generated for jobid 219 is [3311, 3360, 4876]
 |                     |found [2335, 8852, 5605]
 job00215       job00217      job00216 
inputDataSet generated for jobid 220 is [1222, 3882, 2754]
 |                     |                    |found [7365, 6221, 957]
 job00215       job00217      job00218 
 |found in=[3311, 3360, 4876]
 job00219       job00217      job00218 
inputDataSet generated for jobid 221 is [8598, 9790, 1749]
inputDataSet generated for jobid 222 is [1422, 9440, 894]
 |                     |found [1222, 3882, 2754]
 job00219       job00220      job00218 
 |                     |                    |found [8598, 9790, 1749]
 job00219       job00220      job00221 
inputDataSet generated for jobid 223 is [5670, 6849, 2753]
 |found in=[1422, 9440, 894]
 job00222       job00220      job00221 
inputDataSet generated for jobid 224 is [9382, 9036, 1304]
inputDataSet generated for jobid 225 is [4857, 9891, 