In [33]:
import gzip
import struct

class TAQTradesReader(object):
    
    '''
    This reader reads an entire compressed binary TAQ trades file into memory,
    uncompresses it, and gives its clients access to the contents of the file
    via a set of get methods.
    '''


    def __init__(self, filePathName ):
        '''
        Do all of the heavy lifting here and give users getters for the results.
        '''
        self.filePathName = filePathName
        with gzip.open( filePathName, 'rb') as f:
            file_content = f.read()
            self._header = struct.unpack_from(">2i",file_content[0:8])
            endI = 8 + ( 4 * self._header[1] )
            self._ts = struct.unpack_from( ( ">%di" % self._header[ 1 ] ), file_content[ 8:endI ] )
            startI = endI
            endI = endI + ( 4 * self._header[1] )
            self._s = struct.unpack_from( ( ">%di" % self._header[ 1 ] ), file_content[ startI:endI ] )
            startI = endI
            endI = endI + ( 4 * self._header[1] )
            self._p = struct.unpack_from( ( ">%df" % self._header[ 1 ] ), file_content[ startI:endI ] )

    def getN(self):
        return self._header[1]
    
    def getSecsFromEpocToMidn(self):
        return self._header[0]
    
    def getPrice( self, index ):
        return self._p[ index ]
    
    def getMillisFromMidn( self, index ):
        return self._ts[ index ]
    
    def getTimestamp(self, index ):
        return self.getMillisFromMidn( index ) # Compatibility 
    
    def getSize( self, index ):
        return self._s[ index ]
    
    def rewrite( self, filePathName, tickerId ):
        s = struct.Struct( ">QHIf" ) 
        out = gzip.open( filePathName, "wb" )
        baseTS = self.getSecsFromEpocToMidn() * 1000
        for i in range( self.getN() ):
            ts = baseTS + self.getMillisFromMidn( i )
            out.write( s.pack( ts, tickerId, self.getSize(i), self.getPrice(i) ) )
        out.close()
    

class TAQQuotesReader(object):
    '''
    This reader reads an entire compressed binary TAQ quotes file into memory,
    uncompresses it, and gives its clients access to the contents of the file
    via a set of get methods.
    '''


    def __init__(self, filePathName ):
        '''
        Do all of the heavy lifting here and give users getters for the
        results.
        '''
        self._filePathName = filePathName
        with gzip.open( self._filePathName, 'rb') as f:
            file_content = f.read()
            self._header = struct.unpack_from(">2i",file_content[0:8])
            
            # millis from midnight
            endI = 8 + ( 4 * self._header[1] )
            self._ts = struct.unpack_from( ( ">%di" % self._header[ 1 ] ), file_content[ 8:endI ] )
            startI = endI
            
            # bid size
            endI = endI + ( 4 * self._header[1] )
            self._bs = struct.unpack_from( ( ">%di" % self._header[ 1 ] ), file_content[ startI:endI ] )
            startI = endI

            # bid price
            endI = endI + ( 4 * self._header[1] )
            self._bp = struct.unpack_from( ( ">%df" % self._header[ 1 ] ), file_content[ startI:endI ] )
            startI = endI
            
            # ask size
            endI = endI + ( 4 * self._header[1] )
            self._as = struct.unpack_from( ( ">%di" % self._header[ 1 ] ), file_content[ startI:endI ] )
            startI = endI

            # ask price
            endI = endI + ( 4 * self._header[1] )
            self._ap = struct.unpack_from( ( ">%df" % self._header[ 1 ] ), file_content[ startI:endI ] )

    def getN(self):
        return self._header[1]
    
    def getSecsFromEpocToMidn(self):
        return self._header[0]
    
    def getMillisFromMidn( self, index ):
        return self._ts[ index ]

    def getAskSize( self, index ):
        return self._as[ index ]
    
    def getAskPrice( self, index ):
        return self._ap[ index ]

    def getBidSize( self, index ):
        return self._bs[ index ]
    
    def getBidPrice( self, index ):
        return self._bp[ index ]

In [31]:
import unittest



class Test_TAQTradesReader(unittest.TestCase):

    def test1(self):

        reader = TAQTradesReader('IBM_trades.binRT' )
        
        zz = list([
            reader.getN(),
            reader.getSecsFromEpocToMidn(),
            reader.getMillisFromMidn( 0 ),
            reader.getSize( 0 ),
            reader.getPrice( 0 )
        ])

        self.assertEqual(
            '[25367, 1190260800, 34210000, 76600, 116.2699966430664]',
            str( zz )
        )


if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

.
----------------------------------------------------------------------
Ran 1 test in 0.007s

OK


In [34]:
import unittest

class Test_TAQQuotesReader(unittest.TestCase):

    def test1(self):

        reader = TAQQuotesReader('IBM_quotes.binRQ' )
        
        zz = list([
            reader.getN(),
            reader.getSecsFromEpocToMidn(),
            reader.getMillisFromMidn( 0 ),
            reader.getAskSize( 0 ),
            reader.getAskPrice( 0 ),
            reader.getBidSize( 0 ),
            reader.getBidPrice( 0 )
        ])
        self.assertEqual( '[70166, 1190260800, 34210000, 1, 116.19999694824219, 38, 116.19999694824219]', str( zz ) )


if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

..
----------------------------------------------------------------------
Ran 2 tests in 0.027s

OK
