In [None]:
%%writefile genindex.py
# -*- coding: utf-8 -*-
import os
import struct
import mmap
import sys
import yaml
import argparse
import multiprocessing
import numpy as np
from bitstring import BitStream, ConstBitStream,Bits
from bitarray import bitarray as BitArray
from itertools import chain
import csv

def sec2time(sec, n_msec=3):
    ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
    if hasattr(sec,'__len__'):
        return [sec2time(s) for s in sec]
    m, s = divmod(sec, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    if n_msec > 0:
        pattern = '%%02d:%%02d:%%0%d.%df' % (n_msec+3, n_msec)
    else:
        pattern = r'%02d:%02d:%02d'
    if d == 0:
        return pattern % (h, m, s)
    return ('%d days, ' + pattern) % (d, h, m, s)

def readheader(BS,hprint=None):
    b = BitStream('0x'+''.join(x.encode('hex') for x in struct.unpack('>72s',BS)))
    #magic = b.read(32).hex# 32 magic word
    header = {}
    header["recordlength"] = b.read(16).uint   # 16 0..65535
    header["hdrlen"] = b.read(8).uint    # 8 0..255
    header["blocksize"] = b.read(8).uint   # 8 0..25
    header["samplerate"] = b.read(16).uint   # 16 0..65535
    header["efegain"] = b.read(10).uint   # 10 0..650
    header["qu"] = b.read(3).uint    # 16 0..7 (0=>1bits,1=>2bits,2=>4bits,4=>8bits,5=>16bits, 3,6,7 spare)
    header["msg"] = b.read(3).uint    # 3 6
    header["frameid"] = b.read(32).uint  # 32 0..4294967295
    header["version"] = b.read(7).uint    # 7 0..127
    header["timetag_samps"] = b.read(25).uint   # 25 0..17499999
    header["offsetfreq"] = b.read(32).int   # 32 0..4294967295
    header["timetag_secs"] = b.read(17).uint   # 17 0..86399
    header["subc"] = b.read(4).uint    # 4 0..16
    header["digitalgain"] = b.read(11).uint   # 11 0..2047
    header["subchan0_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan1_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan2_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan3_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["sweeprate"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["path_delay"] = b.read(32).int    # 32 0..2^32-1
    header["gdspid"] = b.read(8).uint    # 1 0..1
    header["hs"] = b.read(1).uint    # 1 0..1
    header["semr"] = b.read(12).int    # 12
    header["sweepchange"] = b.read(11).uint   # 11 0..2047
    header["ncov"] = b.read(1).uint    # 1 0..1
    header["ncoreset_c"] = b.read(11).int    # 11 -1024..+1024
    header["ncoreset_t"] = b.read(20).uint   # 20 0..863999
    b.read(128).uint    # 128 Empty
    
    if hprint:
        #print header
        print yaml.dump(header, default_flow_style=False)
    
    return header

def readblocks(filename):
    with open(filename, 'rb') as f:
        fsize=os.path.getsize(filename)
        # memory-map the file, size 0 means whole file
        mm = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)

        while True:
            # Magic Word
            byte=mm.read(4)
            if not byte:
                break
            # Read Header (check cuantization and save timestamp)
            header = readheader(mm.read(72),hprint=None)
            ## Calculate UTC
            utctime=np.float64(header["timetag_secs"])+np.float64(header["timetag_samps"])*np.float64(1./17500000)-np.float64(header["path_delay"])*np.float64(1./35000000)
            sr=header["samplerate"]
            ttime=sec2time(utctime,6)
            print format(utctime, '.10f'),header["timetag_secs"],header["timetag_samps"],header["path_delay"],',',ttime,',',sr,',',filename,',','0',',',fsize
            mm.read(1392)
            break
        
        while True:
            # Magic Word
            byte=mm.read(4)
            if not byte:
                break
            # Read Header (check cuantization and save timestamp)
            header = readheader(mm.read(72),hprint=None)
            ## Calculate UTC
            utctime=np.float64(header["timetag_secs"])+np.float64(header["timetag_samps"])*np.float64(1./17500000)-np.float64(header["path_delay"])*np.float64(1./35000000)
            sr=header["samplerate"]
            ttime=sec2time(utctime,6)
            print format(utctime, '.10f'),header["timetag_secs"],header["timetag_samps"],header["path_delay"],',',ttime,',',sr,',',filename,',','0',',',fsize
            mm.read(1392)
            break
            


            
def main(argus):    
    filename=argus.inputfile  #your filename goes here.

    output=argus.outputpath
    fsize=os.path.getsize(filename) #size of file (in bytes)  
    if fsize == 0:
        print ",,",filename
        sys.exit(1)
#     print filename, fsize
    head, tail = os.path.split(filename)
    start='12:10:00'
    end='12:10:55'
    readblocks(filename)
        
if __name__ == '__main__':
    import time
    start_time = time.time()
    ## CMD arguments
    arguments=sys.argv[1:]
 
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--inputfile', default='False')
    parser.add_argument('-o', '--outputpath', default='False')
    #parser.add_argument('-n', '--nblocks', default='None')
    
    argus = parser.parse_args(arguments)
#     print "Starting..."
    main(argus)

In [None]:
%%writefile searchvaliddata.py
import pandas as pd
pd.set_option('display.width',256)
import numpy as np
from datetime import datetime

def find(indexfile,start,end):

    aa = pd.read_csv(indexfile)
    print aa
    
    df = aa[(aa['start_seconds'] >= start) & (aa['start_seconds'] <= end)]
    print df
    return df

def sec2time(sec, n_msec=3):
    ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
    if hasattr(sec,'__len__'):
        return [sec2time(s) for s in sec]
    m, s = divmod(sec, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    if n_msec > 0:
        pattern = '%%02d:%%02d:%%0%d.%df' % (n_msec+3, n_msec)
    else:
        pattern = r'%02d:%02d:%02d'
    if d == 0:
        return pattern % (h, m, s)
    return ('%d days, ' + pattern) % (d, h, m, s)

def time2sec(time, n_msec=3):
    ''' Convert 'D days, HH:MM:SS.FFF' to seconds'''
    from datetime import datetime as dt
    pt =dt.strptime(time,'%H:%M:%S.%f')
    total_seconds=pt.second+pt.minute*60+pt.hour*3600
    return total_seconds

def filterdata(indexfile,start,end):
    ### Search files corresponding to the valid survey time
    start=time2sec(start)
    end=time2sec(end)
    
    df=find(indexfile,start,end)
    
    ### Separate the Polarization file list
    
    E1=df[df['file'].str.contains('_E1_', case=True, flags=0, na=np.nan, regex=True)]
    E2=df[df['file'].str.contains('_E2_', case=True, flags=0, na=np.nan, regex=True)]
    
    print E1.head(1)
    print E1.tail(1)
    
    print E2.head(1)
    print E2.tail(1)
    
    ### If there is some seconds in the previous file 
    
    if E1.iloc[0]['file'] > start:
        filename=E1.iloc[0]['file']
        numberfile=int(filename.split('_')[-1])-1
        if not numberfile == 0:
            print '_'.join(filename.split('_')[:-1]+['{0:04}'.format(numberfile)])
        else:
            "First survey file"
    
    if E1.iloc[0]['file'] > start:
        filename=E2.iloc[0]['file']
        numberfile=int(filename.split('_')[-1])-1
        if not numberfile == 0:
            print '_'.join(filename.split('_')[:-1]+['{0:04}'.format(numberfile)])
        else:
            "First survey file"
 

In [None]:
# -*- coding: utf-8 -*-
import os
import struct
import mmap
import sys
import yaml
import argparse
import multiprocessing
import numpy as np
from bitstring import BitStream, ConstBitStream,Bits
from bitarray import bitarray as BitArray
from itertools import chain
import csv

def sec2time(sec, n_msec=3):
    ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
    if hasattr(sec,'__len__'):
        return [sec2time(s) for s in sec]
    m, s = divmod(sec, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    if n_msec > 0:
        pattern = '%%02d:%%02d:%%0%d.%df' % (n_msec+3, n_msec)
    else:
        pattern = r'%02d:%02d:%02d'
    if d == 0:
        return pattern % (h, m, s)
    return ('%d days, ' + pattern) % (d, h, m, s)

def time2sec(time, n_msec=3):
    ''' Convert 'D days, HH:MM:SS.FFF' to seconds'''
    from datetime import datetime as dt
    pt =dt.strptime(time,'%H:%M:%S.%f')
    total_seconds=pt.second+pt.minute*60+pt.hour*3600
    return total_seconds

def readheader(BS,hprint=None):
    b = BitStream('0x'+''.join(x.encode('hex') for x in struct.unpack('>72s',BS)))
    #magic = b.read(32).hex# 32 magic word
    header = {}
    header["recordlength"] = b.read(16).uint   # 16 0..65535
    header["hdrlen"] = b.read(8).uint    # 8 0..255
    header["blocksize"] = b.read(8).uint   # 8 0..25
    header["samplerate"] = b.read(16).uint   # 16 0..65535
    header["efegain"] = b.read(10).uint   # 10 0..650
    header["qu"] = b.read(3).uint    # 16 0..7 (0=>1bits,1=>2bits,2=>4bits,4=>8bits,5=>16bits, 3,6,7 spare)
    header["msg"] = b.read(3).uint    # 3 6
    header["frameid"] = b.read(32).uint  # 32 0..4294967295
    header["version"] = b.read(7).uint    # 7 0..127
    header["timetag_samps"] = b.read(25).uint   # 25 0..17499999
    header["offsetfreq"] = b.read(32).int   # 32 0..4294967295
    header["timetag_secs"] = b.read(17).uint   # 17 0..86399
    header["subc"] = b.read(4).uint    # 4 0..16
    header["digitalgain"] = b.read(11).uint   # 11 0..2047
    header["subchan0_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan1_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan2_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan3_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["sweeprate"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["path_delay"] = b.read(32).int    # 32 0..2^32-1
    header["gdspid"] = b.read(8).uint    # 1 0..1
    header["hs"] = b.read(1).uint    # 1 0..1
    header["semr"] = b.read(12).int    # 12
    header["sweepchange"] = b.read(11).uint   # 11 0..2047
    header["ncov"] = b.read(1).uint    # 1 0..1
    header["ncoreset_c"] = b.read(11).int    # 11 -1024..+1024
    header["ncoreset_t"] = b.read(20).uint   # 20 0..863999
    b.read(128).uint    # 128 Empty
    
    if hprint:
        #print header
        print yaml.dump(header, default_flow_style=False)
    
    return header

def readblocks(filename,start,end):
    with open(filename, 'rb') as f:
        # memory-map the file, size 0 means whole file
        mm = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)

        mm.seek(start)
        position=mm.tell()
        print position
            
        while True:
            # Magic Word
            byte=mm.read(4)
            if not byte:
                break
            # Read Header (check cuantization and save timestamp)
            header = readheader(mm.read(72),hprint=None)
            ## Calculate UTC
            X=np.float64(header["timetag_samps"])*np.float64(1./17500000)
            Y=np.float64(header["path_delay"])*np.float64(1./35000000)
            utctime=np.float64(header["timetag_secs"])+X-Y
            ttime=sec2time(utctime,6)
            print utctime,',',ttime,',',filename
            mm.read(1392)
            if end == mm.tell():
                return [utctime,ttime,filename]
            
def findposition(Tstart,Tend,filename,first=None,last=None):
    
    if first and last:
        sys.exti(0)
        
    fsize=os.path.getsize(filename) #size of file (in bytes)  
    
    if fsize == 0:
        print ",,",filename
        sys.exit(1)
    
    print fsize
    start=0
    end=1468
    ti=readblocks(filename,start,end)
    print ti
    
    start=fsize-1468
    end=fsize
    tf=readblocks(filename,start,end)
    print tf
    
    deltaT=(tf[0]-ti[0])/(fsize/1468)
    print deltaT

    if first:
        ## start - Tstart
        diffT=(tf[0] - Tstart)
        print diffT
        nblocks=int(diffT/deltaT)
        start=fsize-(nblocks+2)*1468
        end=start+1468
        tt=readblocks(filename,start,end)
        tt.extend([start,])
    
    if last:
        ## Tend-end
        diffT=(Tend-ti[0]) 
        print diffT
        nblocks=int(diffT/deltaT)
        start=nblocks*1468
        end=(nblocks+2)*1468
        tt=readblocks(filename,start,end)
        tt.extend([0,end])
    
    print tt
        

first=True
last=None
if first:
    filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_122807_0002'
#     filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_195710_0003'
    
if last:
    filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_122807_0003'
#     filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_195710_0004'

start='12:30:00.0'
end='12:30:55.0'

start=time2sec(start)
end=time2sec(end)
findposition(start,end,filename,first,last)

first=None
last=True
if first:
    filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_122807_0002'
#     filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_195710_0003'
    
if last:
    filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_122807_0003'
#     filename='/media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1_195710_0004'

start='12:30:00.0'
end='12:30:55.0'

start=time2sec(start)
end=time2sec(end)
findposition(start,end,filename,first,last)

In [2]:
%%writefile prepare.py

#Input for this software are a configuration file and a survey schedule in csv format.
import os
import glob
import pandas as pd
pd.set_option('display.width',256)
import numpy as np
import datetime
import mmap
import sys

def find(indexfile,start,end):
    print indexfile,start,end

    aa = pd.read_csv(indexfile)
    
    #print aa.head()
    
    df = aa[(aa.index >= start) & (aa.index <= end)]

    #print df.head()
    return df

def sec2time(sec, n_msec=3):
    ''' Convert seconds to 'D days, HH:MM:SS.FFF' '''
    if hasattr(sec,'__len__'):
        return [sec2time(s) for s in sec]
    m, s = divmod(sec, 60)
    h, m = divmod(m, 60)
    d, h = divmod(h, 24)
    if n_msec > 0:
        pattern = '%%02d:%%02d:%%0%d.%df' % (n_msec+3, n_msec)
    else:
        pattern = r'%02d:%02d:%02d'
    if d == 0:
        return pattern % (h, m, s)
    return ('%d days, ' + pattern) % (d, h, m, s)

def time2sec(time, n_msec=3):
    ''' Convert 'D days, HH:MM:SS.FFF' to seconds'''
    from datetime import datetime as dt
    pt=dt.strptime(time,'%H:%M:%S.%f')
    total_seconds=pt.second+pt.minute*60+pt.hour*3600
    return total_seconds

def findposition(Tstart,Tend,filename,first=None,last=None):
    
    if first and last:
        sys.exti(0)
        
    fsize=os.path.getsize(filename) #size of file (in bytes)  
    
    if fsize == 0:
        print ",,",filename
        sys.exit(1)
    
#     print fsize
    start=0
    end=1468
    ti=readblocks2(filename,start,end)
#     print ti
    
    start=fsize-1468
    end=fsize
    tf=readblocks2(filename,start,end)
#     print tf
    
    deltaT=(tf[0]-ti[0])/(fsize/1468)
#     print deltaT

    if first:
        ## start - Tstart
        diffT=(tf[0] - Tstart)
#         print diffT
        nblocks=int(diffT/deltaT)
        start=fsize-(nblocks+2)*1468
        end=start+1468
        tt=readblocks2(filename,start,end)
#         tt.extend([start,])
    
    if last:
        ## Tend-end
        diffT=(Tend-ti[0]) 
#         print diffT
        nblocks=int(diffT/deltaT)
        start=nblocks*1468
        end=(nblocks+2)*1468
        tt=readblocks2(filename,start,end)
#         tt.extend([0,end])
    
    #print tt
    return tt

def filterdata(indexfile,start,end):
    ### Search files corresponding to the valid survey time
    start=time2sec(start)
    end=time2sec(end)
    
    df=find(indexfile,start,end)
    
    ### Separate the Polarization file list
    
    E1=df[df['filename'].str.contains('_E1_', case=True, flags=0, na=np.nan, regex=True)]
    E2=df[df['filename'].str.contains('_E2_', case=True, flags=0, na=np.nan, regex=True)]
        
    E1.reset_index(level=0, inplace=True)   
    E2.reset_index(level=0, inplace=True)
    
    if len(E1.index) == 1:
        print 'One file'
    else:
        
        new=[]
        ### If there is some seconds in the previous file  
        if E1.iloc[0]['index'] > start:
            print 'Some seconds in the previous file' 
            filename=E1.iloc[0]['filename']
            numberfile=int(filename.split('_')[-1])-1
        
            if not numberfile == 0:
                prevfile='_'.join(filename.split('_')[:-1]+['{0:04}'.format(numberfile)])
                new=[findposition(start,end,prevfile,first=True,last=False)]
                result=E1.values.tolist()
                new.extend(result)
            else:
                print "First survey file"
                
        print new
        
        new=[]       
        if E1.iloc[-1]['index'] < end:
            print 'Some seconds before end of file' 
            filename=E1.iloc[0]['filename']
            new=[findposition(start,end,filename,first=False,last=True)]
            result=E1.values.tolist()
            new.extend(result)
            print new
            
        new=[]
        ### If there is some seconds in the previous file  
        if E2.iloc[0]['index'] > start:
            print 'Some seconds in the previous file' 
            filename=E2.iloc[0]['filename']
            numberfile=int(filename.split('_')[-1])-1

        if not numberfile == 0:
            prevfile='_'.join(filename.split('_')[:-1]+['{0:04}'.format(numberfile)])
            new=[findposition(start,end,prevfile,first=True,last=False)]
            result=E2.values.tolist()
            new.extend(result)
        else:
            print "First survey file"
        
        print new
        
        new=[]
        if E1.iloc[-1]['index']  < end:
            print 'Some seconds before end of file' 
            filename=E2.iloc[0]['filename']
            new=[findposition(start,end,filename,first=False,last=True)]
            result=E2.values.tolist()
            new.extend(result)

        print new

def readheader(BS,hprint=None):
    from bitstring import BitStream
    import struct
    b = BitStream('0x'+''.join(x.encode('hex') for x in struct.unpack('>72s',BS)))
    #magic = b.read(32).hex# 32 magic word
    header = {}
    header["recordlength"] = b.read(16).uint   # 16 0..65535
    header["hdrlen"] = b.read(8).uint    # 8 0..255
    header["blocksize"] = b.read(8).uint   # 8 0..25
    header["samplerate"] = b.read(16).uint   # 16 0..65535
    header["efegain"] = b.read(10).uint   # 10 0..650
    header["qu"] = b.read(3).uint    # 16 0..7 (0=>1bits,1=>2bits,2=>4bits,4=>8bits,5=>16bits, 3,6,7 spare)
    header["msg"] = b.read(3).uint    # 3 6
    header["frameid"] = b.read(32).uint  # 32 0..4294967295
    header["version"] = b.read(7).uint    # 7 0..127
    header["timetag_samps"] = b.read(25).uint   # 25 0..17499999
    header["offsetfreq"] = b.read(32).int   # 32 0..4294967295
    header["timetag_secs"] = b.read(17).uint   # 17 0..86399
    header["subc"] = b.read(4).uint    # 4 0..16
    header["digitalgain"] = b.read(11).uint   # 11 0..2047
    header["subchan0_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan1_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan2_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["subchan3_offset"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["sweeprate"] = b.read(32).int    # 32 -2147483647..+2147483647
    header["path_delay"] = b.read(32).int    # 32 0..2^32-1
    header["gdspid"] = b.read(8).uint    # 1 0..1
    header["hs"] = b.read(1).uint    # 1 0..1
    header["semr"] = b.read(12).int    # 12
    header["sweepchange"] = b.read(11).uint   # 11 0..2047
    header["ncov"] = b.read(1).uint    # 1 0..1
    header["ncoreset_c"] = b.read(11).int    # 11 -1024..+1024
    header["ncoreset_t"] = b.read(20).uint   # 20 0..863999
    b.read(128).uint    # 128 Empty
    
    if hprint:
        #print header
        print yaml.dump(header, default_flow_style=False)
    
    return header

def readblocks2(filename,start,end):
    with open(filename, 'rb') as f:
        fsize=os.path.getsize(filename)
        if not fsize == 0:
            # memory-map the file, size 0 means whole file
            mm = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
        else:
            return ['nan','nan','nan',filename,'0',int(fsize)]

        mm.seek(start)
        position=mm.tell()
#         print position
            
        while True:
            # Magic Word
            byte=mm.read(4)
            if not byte:
                break
            # Read Header (check cuantization and save timestamp)
            header = readheader(mm.read(72),hprint=None)
            ## Calculate UTC
            X=np.float64(header["timetag_samps"])*np.float64(1./17500000)
            Y=np.float64(header["path_delay"])*np.float64(1./35000000)
            utctime=np.float64(header["timetag_secs"])+X-Y
            ttime=sec2time(utctime,6)
            fs=header["samplerate"]
#             print utctime,',',ttime,',',filename
            mm.read(1392)
            if end == mm.tell():
                return [utctime,ttime,fs,filename,int(start),int(end)]

def readblocks(filename):
    import mmap

    with open(filename, 'rb') as f:
        fsize=os.path.getsize(filename)
        if not fsize == 0:
            # memory-map the file, size 0 means whole file
            mm = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
        else:
            return ['nan','nan','nan',filename,'0',fsize]

        while True:
            # Magic Word
            byte=mm.read(4)
            if not byte:
                break
            # Read Header (check cuantization and save timestamp)
            header = readheader(mm.read(72),hprint=None)
            ## Calculate UTC
            X=np.float64(header["timetag_samps"])*np.float64(1./17500000)
            Y=np.float64(header["path_delay"])*np.float64(1./35000000)
            utctime=np.float64(header["timetag_secs"])+X-Y
            ttime=sec2time(utctime,6)
            fs=header["samplerate"]
            mm.read(1392)
            return [utctime,ttime,fs,filename,int(0),int(fsize)]
            break

def genindex(indexfile,workdir):
    import csv
    thelist=glob.glob(workdir+'/*') 
    ll=[]
    for i in thelist:
        if i.split('_')[-1] == '0000':
            continue
        line=readblocks(i)
        ll.append(line)
    with open(indexfile, 'wa') as f:
        fieldnames = ['start_seconds','start_ttime','freq','filename','start_byte','end_byte']
        writer = csv.writer(f)
        writer.writerow(fieldnames)
        writer.writerows(ll)

## Variable 

### Schedule from the Observing Report
schedulefile='/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/schedule.csv'
configfile='/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/prepare.conf'
dataschedulefile='/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/dataschedule.csv'
workdir='/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/'

surveypath='/media/bgx/CONAE/'

indexfiles=['ESU1_index.csv','ESU2_index.csv','ESU3_index.csv']

## Generate First Index for the Survey
for i in indexfiles:
    indexfile=workdir+i
    print indexfile
    if os.path.isfile(indexfile):
        print 'Index OK'
    else: 
        print 'Index file doesn exist.'
        print 'Creating...',indexfile
        genindex(indexfile,surveypath+i.split('_')[0])

## Read Schedule
headers = ['datetime', 'recordlength', 'source','frec_hz','resolution']
dtypes = {'datetime': 'str', 'recordlength': 'float', 'source': 'str','frec_hz':'float', 'resolution':'int'}
df = pd.read_csv(schedulefile, header=None, names=headers, dtype=dtypes)
#print df

## Generate Data Schedule
a=0
for index, row in df.iterrows():
    #print row
    tstart=datetime.datetime.strptime(row['datetime'],'%Y/%m/%d/%H:%M:%S')
    tend=tstart+datetime.timedelta(seconds=row['recordlength'])
#     print tstart.timetuple().tm_yday, tstart.strftime("%H:%M:%S.%f"),tend.strftime("%H:%M:%S.%f")
    filterdata(workdir+indexfiles[0],tstart.strftime("%H:%M:%S.%f"),tend.strftime("%H:%M:%S.%f"))
    if a == 2:
        break
    a=a+1

## Should be a list of sources to process
# source=['0521']
# selected=df[df['source'].str.contains(source[0], case=True, flags=0, na=np.nan, regex=True)]
# print selected




Overwriting prepare.py


In [10]:
%run prepare


/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/ESU1_index.csv
Index OK
/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/ESU2_index.csv
Index OK
/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/ESU3_index.csv
Index OK
datetime        2016/03/28/12:10:00
recordlength                     56
source                     1934-638
frec_hz                      100000
resolution                       16
Name: 0, dtype: object
88 12:10:00.000000 12:10:56.000000
/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/ESU1_index.csv 43800 43856
                start_seconds start_ttime  \
43803.903055  12:10:03.903055         176   
43860.989691  12:11:00.989691         176   
43920.980357  12:12:00.980357         176   
43980.966648  12:13:00.966648         176   
44040.961689  12:14:00.961689         176   

                                                       filename  start_byte  \
43803.903055  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD

In [4]:
import os
import glob
import pandas as pd
import numpy as np
import datetime

indexfile='/home/bgx/MEGAsync-Work/ITeDA/DSA3/Proyectos/detection/software/ESU1_index.csv'
start=43800
end=43856

aa = pd.read_csv(indexfile)
    
print aa.head()

df = aa[(aa.index >= start) & (aa.index <= end)]
print df.head()


                start_seconds start_ttime  \
43803.903055  12:10:03.903055         176   
43860.989691  12:11:00.989691         176   
43920.980357  12:12:00.980357         176   
43980.966648  12:13:00.966648         176   
44040.961689  12:14:00.961689         176   

                                                       filename  start_byte  \
43803.903055  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1...           0   
43860.989691  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1...           0   
43920.980357  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1...           0   
43980.966648  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1...           0   
44040.961689  /media/bgx/CONAE/ESU1/MG11_NET4_2016_088_DD_E1...           0   

               end_byte  
43803.903055   95778192  
43860.989691  100650484  
43920.980357  100643144  
43980.966648  100657824  
44040.961689  100720948  
                start_seconds start_ttime  \
43803.903055  12:10:03.903055         176   
43803.90

In [44]:
# Add a row to df  
from pandas import DataFrame as df  
import numpy as np  
mydf = df([1.1, 1.1, 1.1]).T
print mydf
arow2 = [2.2, 2.2, 2.2]  
mynparray = mydf.values
mynparray = np.vstack((mynparray,arow2))  
mydf = df(mynparray)
print mydf

     0    1    2
0  1.1  1.1  1.1
     0    1    2
0  1.1  1.1  1.1
1  2.2  2.2  2.2
