In [2]:
import pandas as pd
import numpy as np
import os
from numpy import random as nprandom
import random
import time
from multiprocessing import Pool
import sys
from queue import Queue
from threading import Thread
import multiprocessing

featuresPath = os.path.join('..', 'features')
lookback = 10
winNoCols = ['WinNo1', 'WinNo2', 'WinNo3', 'WinNo4', 'WinNo5', 'WinNo6', 'WinNo7',
        'WinNo8', 'WinNo9', 'WinNo10', 'WinNo11', 'WinNo12']

df = pd.read_csv(os.path.join(featuresPath, 'features.csv'))
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,id,WinNo1,WinNo2,WinNo3,WinNo4,WinNo5,WinNo6,WinNo7,WinNo8,WinNo9,...,e_sinId%43,e_cosId%43,sq_sinId%43,sq_cosId%43,sinId%47,cosId%47,e_sinId%47,e_cosId%47,sq_sinId%47,sq_cosId%47
0,1,2,5,7,8,22,33,38,44,46,...,1.156735,2.689468,0.0212,0.9788,0.133287,0.991077,1.142578,2.694136,0.017765,0.982235
1,2,1,9,11,14,33,37,41,42,43,...,1.333889,2.605437,0.083001,0.916999,0.264195,0.964469,1.302383,2.623395,0.069799,0.930201
2,3,1,2,5,8,13,16,19,21,34,...,1.52876,2.47304,0.180164,0.819836,0.390389,0.92065,1.477556,2.510922,0.152404,0.847596
3,4,1,3,6,12,17,24,25,26,40,...,1.73632,2.302505,0.304448,0.695552,0.509617,0.860402,1.664653,2.36411,0.259709,0.740291
4,5,6,7,8,21,30,32,35,37,39,...,1.949005,2.105962,0.445314,0.554686,0.61975,0.784799,1.858463,2.191967,0.38409,0.61591


# Prepare Dictionaries

In [3]:
contFDict = {}
seqFDict = {}
winNumbersTDict = {}

start = time.time()
print("Creating dictionaries for batchPrep")
data = df.values

def getPastWinningNumbers(drawId): 
    return data[drawId-lookback:drawId, 1:13]
    
for example in [x for x in df.iloc[lookback:].values]:
    drawId = int(example[0])
    winNumbers = example[1:13]
    seqFDict[drawId] = getPastWinningNumbers(drawId)       # get previous 'lookback' times winning numbers
    contFDict[drawId] = example[13:]                       # continuous features
    winNumbersTDict[drawId] = winNumbers                   # the targets (12 numbers)
    
end = time.time()
elapsed = round(end-start,3)
print("Elapsed time: "+str(elapsed)+" seconds!")

Creating dictionaries for batchPrep
Elapsed time: 1.721 seconds!


# Batch Prep

In [4]:
batchSize = 64
batchesNo = 100
evalInterval = int(batchesNo/5)
drawIds = set(winNumbersTDict.keys())

def batchPrep():
    batchDrawIds = random.sample(drawIds, batchSize)
    contFeatures = np.array([contFDict[x] for x in batchDrawIds])
    seqFeatures = np.array([seqFDict[x] for x in batchDrawIds])
    targets = np.array([winNumbersTDict[x] for x in batchDrawIds])
    return contFeatures, seqFeatures, targets
    
start = time.time()
print("Running a batchprep simulation of size "+str(batchesNo))
for i in range(batchesNo):
    contFeatures, seqFeatures, targets = batchPrep()
    if i>0 and i%evalInterval==0:
        print("Eval time!")
        
end = time.time()
elapsed = round(end-start,3)
print("Elapsed time: "+str(elapsed)+" seconds!")
print("Batch speed: "+str(batchesNo/elapsed)+" batches/sec")

Running a batchprep simulation of size 100
Eval time!
Eval time!
Eval time!
Eval time!
Elapsed time: 0.907 seconds!
Batch speed: 110.25358324145535 batches/sec


In [5]:
print(contFeatures.shape)       # batchsize, features
print(seqFeatures.shape)        # batchsize, timesteps (lookback), features
print(targets.shape)            # batchsize, targetsize

(64, 142)
(64, 10, 12)
(64, 12)


# Multithreaded batchPrep

In [8]:
def batchPrep():
    """
    Prepares a batch and puts it in the queue for the other thread to process 
    """
    for i in range(batchesNo): 
        batchDrawIds = random.sample(drawIds, batchSize)                # prepares batch
        contFeatures = np.array([contFDict[x] for x in batchDrawIds])
        seqFeatures = np.array([seqFDict[x] for x in batchDrawIds])
        targets = np.array([winNumbersTDict[x] for x in batchDrawIds])
        q.put((contFeatures, seqFeatures, targets))                     # put it in the queue
    
def trainBatch(BatchTuple):
    """
    Feeds a batch to the model for training and prints Loss
    Args: 
      BatchTuple: the required input for the model corresponding to one batc/h 
    Returns: Sucess Code
    """
    # TF model train
    # print loss
    return 0

def runEvaluation():
    """
    Runs every 500 steps to evaluate performance on eval set and prints loss
    Returns: Sucess Code
    """
    return 0

def trainModel(queue, batchesNo):
    """
    Spawns a thread to produce batches and main thread feeds them to the model
    Args: 
      queue: in OOP design, it will be self.queue
      batchesNo: in OOP design, it will be self.batchesNo
    """     
    t = Thread(target=batchPrep)
    t.daemon = True
    t.start()
    for i in range(batchesNo):  
        batchtuple = queue.get()
        trainBatch(batchtuple)
        if i>0 and i%500==0:
            print("Running Evaluation!")
            runEvaluation()
        queue.task_done()
    try:
        queue.join()                               # Join all Threads
    except KeyboardInterrupt:
        sys.exit(1)    

simNo = 1                      # number of simulations
start = time.time()
for i in range(simNo):            
    q = Queue(maxsize=5)       # batch queue
    batchesNo = 1000
    trainModel(q, batchesNo)                        

end = time.time()
elapsed = round(end-start,3)
print("Elapsed time: "+str(elapsed)+" seconds!")
if elapsed>0:
    print("Batch speed: "+str(simNo*batchesNo/elapsed)+" batches/sec")

Running Evaluation!
Elapsed time: 9.071 seconds!
Batch speed: 110.24142872891633 batches/sec
