In [1]:
import pandas
import re
from gbdhidro.hobo import hobo

In [57]:
filename = './EHP07034.csv'
ID = 'EHP-07'
SN = '10364362'
config_file = './stations_info.csv'

In [58]:
title, serial_number, header, extra = hobo.get_info(filename)
print(title, serial_number)
print(header)

EHP-07 10364362
['#', 'Date Time, GMT-03:00', 'Chuva, mm (LGR S/N: 10364362, SEN S/N: 10364362, LBL: mm)', 'Soma Acum.: Chuva, mm (LGR S/N: 10364362)', 'Coupler Attached (LGR S/N: 10364362)', 'Host Connected (LGR S/N: 10364362)', 'End Of File (LGR S/N: 10364362)']


In [59]:
# Abre arquivo de configuracao e retira dados importantes
cfgs = pandas.read_csv(config_file)
row = cfgs.loc[cfgs['Plot Title'] == title]
if row.empty:
    # Erro - nao tem nenhuma informacao sobre esse titulo de plot
    print('Buuu - Nao encontrei nada com esse titulo de plot')
station_id = row.iloc[0]['Codigo']
station_sn = row.iloc[0]['Numero de serie']
station_latitude = row.iloc[0]['Latitude [graus]']
station_longitude = row.iloc[0]['Longitude [graus]']
station_altitude = row.iloc[0]['Altitude [m]']
station_variable_col = row.iloc[0]['Coluna variavel']
station_datetime_col = row.iloc[0]['Coluna data/hora']

In [60]:
cfg_sn

10440731

In [61]:
# checa se arquivo tem o nome e o numero de serie esperado
if title!=ID:
    print('Titulo do arquivo ({}) diferente de id esperado ({})'.format(title,ID))

if serial_number != SN:
    print('Serial number ({}) diferente do esperado ({})'.format(serial_number, SN))

In [62]:
# Le dados
data = hobo.get_data(filename)
data

Unnamed: 0,#,"Date Time, GMT-03:00","Chuva, mm (LGR S/N: 10364362, SEN S/N: 10364362, LBL: mm)","Soma Acum.: Chuva, mm (LGR S/N: 10364362)",Coupler Attached (LGR S/N: 10364362),Host Connected (LGR S/N: 10364362),End Of File (LGR S/N: 10364362)
0,1,02/26/20 12:00:00 AM,,0.0,,,
1,2,02/26/20 04:00:00 PM,0.0,,,,
2,3,02/27/20 12:00:00 AM,,0.0,,,
3,4,02/28/20 12:00:00 AM,,0.0,,,
4,5,02/29/20 12:00:00 AM,,0.0,,,
...,...,...,...,...,...,...,...
547,548,05/07/20 12:00:00 AM,,0.0,,,
548,549,05/08/20 12:00:00 AM,,0.0,,,
549,550,05/08/20 11:45:30 AM,,,Logged,,
550,551,05/08/20 11:45:35 AM,,,,Logged,


In [70]:
# Titule e sn confirmado, le dados extra
details = hobo.process_details(extra)
series = details['Details']
teste = series['Series: Soma Acum.: Chuva mm']
teste['Filter Parameters']
series

{'Series: Chuva mm (mm)': {'Devices': {'Device Info': {'Product': 'HOBO UA-003-64 Pendant Temp/Event',
    'Serial Number': '10364362',
    'Version Number': '1.17',
    'Manufacturer': 'Onset Computer Corp.',
    'Device Memory': '65536',
    'Header Created': '05/14/15 08:51:32 AM GMT-03:00'}},
  'Deployment Info': {'Full Series Name': 'Chuva mm',
   'Sensor Label': 'mm',
   'Launch Name': 'EHP-07',
   'Deployment Number': '40',
   'Launch Time': '02/26/20 03:32:52 PM GMT-03:00',
   'Launch GMT Offset': '-3 Hr 0 Min',
   'Battery at Launch': '2.91 Volts',
   'Launching Program': 'HOBOware -3.7.12_0425_0948_Windows'},
  'Series Statistics': {'Samples': '477',
   'First Sample Time': '02/26/20 04:00:00 PM GMT-03:00',
   'Last Sample Time': '05/08/20 11:46:46 AM GMT-03:00'}},
 'Series: Soma Acum.: Chuva mm': {'Devices': {'Device Info': {'Product': 'HOBO UA-003-64 Pendant Temp/Event',
    'Serial Number': '10364362',
    'Version Number': '1.17',
    'Manufacturer': 'Onset Computer Corp.

In [None]:
def find_title(str_line):
    # Extrai titulo de string
    match = re.search(r'(?:Plot Title: )([^"]+)',str_line)
    if match:
        return match.group(1)
    else:
        return None


def find_serial_number(str_line):
    """
    Extrai numero de serial de string
    """
    match = re.search(r'(?:LGR S/N: |Serial Number:)(\d+)',str_line)
    if match:
        return match.group(1)
    else:
        return None

def get_info(file_name):
    
    # Obtem nome das colunas
    header = list(pandas.read_csv(filename, delimiter=delimiter,  header=0, skiprows=1, nrows=0, encoding=encoding))

    # Extrai titulo e informacoes extras se disponiveis
    fo = open(filename, 'rt', encoding='utf-8')
    title = find_title(fo.readline())
    sn = find_serial_number(fo.readline())

    # Informacoes extras
    n_cols = len(header)
    extra = []
    for i in range(MAX_EXTRA_SIZE):
        # separa nos separadores, mas não se tiver dentro de ""
        fields = re.split(delimiter + '(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)', fo.readline())
        n_fields = len(fields)
        if n_fields > n_cols:
            extra.append(delimiter.join(fields[n_cols:]))
        elif n_fields < n_cols:
            # Provavelmente uma linha invalida. ignora
            pass
        else:
            # numero de campos é igual ao de dados. termina procura por dados extra
            break
    extra = ''.join(extra)
    fo.close()
    
    return title, sn, header, extra

def get_data(file_name):
    # Extrai dados
    header = pandas.read_csv(filename, delimiter=delimiter,  header=0, skiprows=1, nrows=0, encoding=encoding)
    table = pandas.read_csv(filename, delimiter=delimiter, header=0, skiprows=1, encoding=encoding, usecols=header) 
    return table


def process_data(text):
    levels = []
    levels.append(['Details'])
    levels.append(['Series:','Event Type:'])
    levels.append(['Devices', 'Deployment Info', 'Series Statistics', 'Filter Parameters'])
    levels.append(['Device Info'])
    #teste = re.split(r'[\n](?=Details|Series: |Event Type: )',extra)
    #r'(?:Series:|Event Type:).+?[\n](?=Series:|Event Type:|$)'    
    return get_all_groups(text, levels)

def get_group(text, level):
    regex1 = '(?:'
    regex2 = '.+?[\n](?='
    first = True
    for m in level:
        if not first:
            regex1 += '|'
            regex2 += '|'
        else:
            first = False
        regex1 += m
        regex2 += m
        
    regex1 += ')'
    regex2 += '|$)'
    regex = regex1 + regex2
    match = re.compile(regex, re.S)
    return match.findall(text)

def text_to_dict(text):
    fields = text.split('\n')
    d = {}
    for f in fields:
        s = f.split(':', 1)
        if len(s) == 2:
            d.update({s[0].strip(): s[1].strip()})
    return(d)

def get_all_groups(text, levels, level_number=0):
    n_levels = len(levels)
    groups = []
    temp = get_group(text, levels[level_number])  

    output = {}
    level_number += 1
    for l in temp:
        [key, val] = l.split("\n", 1)
        new_val = None
        if level_number < n_levels:
            new_val = get_all_groups(val, levels, level_number)
        if new_val:
            val = new_val
        else:
            val = text_to_dict(val)

        output.update({key: val})
    return output


In [None]:
table = get_data(filename)
display(table)

In [None]:
title, sn, header, extra = get_info(filename)

In [None]:
print('Titulo: {}'.format(title))
print('Numero de serie: {}'.format(sn))
print('Cabecalho: {}'.format(header))
print('Informacao extra: {}'.format(extra))

In [None]:
process_data(extra)

In [34]:
delta = "P3Y6M4DT12H30M5S"
#delta = "P5W"

In [35]:
#p = re.compile(r'(?P<year>\d+)Y(?P<month>\d+)M(?P<day>\d+)', re.IGNORECASE)
 #p = re.compile(r'GMT(?P<hour>[-+]*\d+):*(?P<minute>\d+)*', re.IGNORECASE)

# Formato P12W (week)
def period_iso8601_to_relativetime(text):
    from dateutil.relativedelta import relativedelta
    p = re.compile(r'P(?P<years>\d+(?=Y))*\D*(?P<months>\d+(?=M))*\D*(?P<weeks>\d+(?=W))*\D*(?P<days>\d+(?=D))*\D*T*(?P<hours>\d+(?=H))*\D*(?P<minutes>\d+(?=M))*\D*(?P<seconds>\d+(?=S))*\D*', re.IGNORECASE)
    m = p.search(text)
    years = 0
    months = 0
    weeks = 0
    days = 0
    hours = 0
    minutes = 0
    seconds = 0
    
    if m['years']:
        years = float(m['years'])
    if m['months']:
        months = float(m['months'])
    if m['weeks']:
        weeks = float(m['weeks'])
    if m['days']:
        days = float(m['days'])
    if m['hours']:
        hours = float(m['hours'])
    if m['minutes']:
        minutes = float(m['minutes'])
    if m['seconds']:
        seconds = float(m['seconds'])
        
    delta = relativedelta(
        years = years,
        months = months,
        weeks = weeks,
        days = days,
        hours = hours,
        minutes = minutes,
        seconds = seconds)
    return delta
period_iso8601_to_relativetime(delta)

relativedelta(years=+3, months=+6, days=+4, hours=+12, minutes=+30, seconds=+5)

In [20]:
m.groups()

AttributeError: 'NoneType' object has no attribute 'groups'

In [25]:
# Formato P12W (week)
delta = 'P5W'
def period_iso8601_to_deltatime(text):
    from dateutil.relativedelta import relativedelta
    w = re.compile(r'^P(?P<week>\d+(?=W))', re.IGNORECASE)
    m = w.search(text)
    if m:
        delta = relativedelta(
            weeks = float(m['weeks']) )
period_iso8601_to_deltatime('P5W')

IndexError: no such group

In [None]:
flot