In [1]:
import sqlite3 as sq
from pathlib import Path
from contextlib import closing

import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.csv as pacsv

In [3]:
sandbox_dir = Path('/Volumes/TempData/Bretfeld Mario/Chimney-Park-Reprocessing-Sandbox/')
data_dir = sandbox_dir /'Bad'/'Chimney'/'Data'
wd = sandbox_dir = sandbox_dir/'Alex Work'
db_fn = wd / 'CP.sqlite3'

In [45]:
def create_table(name, columns, csr=csr, con=con):
    '''
    creates a sqlite3 table
    csr, con: cursor and connection objects
    name: the table name
    columns: a dict of column names as {colname:dtype}
    '''
    
    columns = [f'{name} {_type}' for name, _type in columns.items()]
    columns = ", ".join(columns)
    query = f'CREATE TABLE IF NOT EXISTS {name}({columns})'
    
    print(query, '\n')
    
    # send the query
    csr.execute(query)
    # save changes
    con.commit()
    
    return

def insert(name, values, columns=None, csr=csr, con=con):
    '''
    inserts several rows of data
    csr, con: cursor and connection objects
    name: table to insert into
    values: list of tuples, shape (nrows, ncols)
    columns: iterable of column names for associated values
    '''
    
    # create a string '?, ?, ?, ...' 
    qmarks = ', '.join(['?']*len(values[0]))
    if columns is not None:
        # create a string 'col1, col2, col3, ...'
        columns = ', '.join(columns)
        
        query = f'INSERT INTO {name}({columns}) VALUES({qmarks})'
    
    else:
        query = f'INSERT INTO {name} VALUES({qmarks})'
    
    print(query, '\n')
    csr.executemany(query, values)
    con.commit()
    
    return

In [42]:
# universal key columns
key_cols = dict(timestamp='TEXT',
               site='TEXT',  # BBXXm
               height='INT',  # m
               stat='TEXT',  # Avg, Std, etc
               record='INT',  # record number
               replicate='INT')  # for duplicate sensors

tables = dict(
    # slow data
    csat_slow = dict(u='REAL', v='REAL', w='REAL', t_sonic='REAL', diag_sonic='INT'),

    irga_slow = dict(co2='REAL', h2o='REAL', p_irga='REAL', t_irga='REAL', diag_irga='INT'),

    netrad_slow = dict(sw_in='REAL', sw_out='REAL', lw_in='REAL', lw_out='REAL', netrad='REAL', alb='REAL', tb='REAL'),

    ppfd_in_slow = dict(ppfd_in='REAL'),

    ppfd_out_slow = dict(ppfd_out='REAL'),

    sap_slows = dict(sapmv='REAL', stemtemp='REAL'),

    d_snow_slow = dict(dist_to='REAL', tcdist_to='REAL', d_snow='REAL', d_snow_taref='REAL'),

    tsn_slow = dict(tsn_tref='REAL', tsn='REAL'),

    burba_slow = dict(irgatemp='REAL', irgatempref='REAL'),

    pri_slow = dict(pri='REAL', down_532='REAL', down_570='REAL', up_532='REAL', up_570='REAL', ind_down='REAL', ind_up='REAL'),

    ta_slow = dict(ta='REAL'),

    rh_slow = dict(rh='REAL'),

    press_slow = dict(pa='REAL'),

    precip_slow = dict(p='REAL'),

    g_slow = dict(ts='REAL', g='REAL'),

    hydras_slow = dict(ts='REAL', swc='REAL'),

    wind_slow = dict(wd='REAL', ws='REAL'),

    # fast data
    ec_fast = dict(conv_fn='TEXT', raw_fn='TEXT'),

    # for qa/qc
    flags = dict(reviewed='INT', flagged='INT', comment='TEXT'),

    # serial numbers
    sns = dict(sn='TEXT')
)

for v in tables.values():
    v.update(key_cols)
    

In [46]:
con = sq.connect(db_fn)
for name, columns in tables.items():
    create_table(name, columns)

CREATE TABLE IF NOT EXISTS csat_slow(u REAL, v REAL, w REAL, t_sonic REAL, diag_sonic INT, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS irga_slow(co2 REAL, h2o REAL, p_irga REAL, t_irga REAL, diag_irga INT, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS netrad_slow(sw_in REAL, sw_out REAL, lw_in REAL, lw_out REAL, netrad REAL, alb REAL, tb REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS ppfd_in_slow(ppfd_in REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS ppfd_out_slow(ppfd_out REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS sap_slows(sapmv REAL, stemtemp REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT) 

CREATE TABLE IF NOT EXISTS d_snow_slow(dis

In [57]:
def make_multi_index(name, columns, con=con, csr=csr):
    '''create a multi-column index on the given columns. columns is a list of str.'''
    
    columns = ', '.join(columns)
    query = f'CREATE INDEX {name}_multidx ON {name}({columns});'
    print(query, '\n')
    
    csr.execute(query)
    
    con.commit()
    
    return

In [65]:
con = sq.connect(db_fn)
csr = con.cursor()
for name in tables:
    make_multi_index(name, ['timestamp', 'site', 'stat', 'height', 'replicate'])

CREATE INDEX csat_slow_multidx ON csat_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX irga_slow_multidx ON irga_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX netrad_slow_multidx ON netrad_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX ppfd_in_slow_multidx ON ppfd_in_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX ppfd_out_slow_multidx ON ppfd_out_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX sap_slows_multidx ON sap_slows(timestamp, site, stat, height, replicate) 

CREATE INDEX d_snow_slow_multidx ON d_snow_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX tsn_slow_multidx ON tsn_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX burba_slow_multidx ON burba_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX pri_slow_multidx ON pri_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX ta_slow_multidx ON ta_slow(timestamp, site, stat, height, replicate) 

CREATE INDEX rh_

In [67]:
csr.execute('SELECT * FROM sqlite_master')
for r in csr.fetchall(): print(r)

('table', 'csat_slow', 'csat_slow', 2, 'CREATE TABLE csat_slow(u REAL, v REAL, w REAL, t_sonic REAL, diag_sonic INT, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT)')
('table', 'irga_slow', 'irga_slow', 3, 'CREATE TABLE irga_slow(co2 REAL, h2o REAL, p_irga REAL, t_irga REAL, diag_irga INT, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT)')
('table', 'netrad_slow', 'netrad_slow', 4, 'CREATE TABLE netrad_slow(sw_in REAL, sw_out REAL, lw_in REAL, lw_out REAL, netrad REAL, alb REAL, tb REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT)')
('table', 'ppfd_in_slow', 'ppfd_in_slow', 5, 'CREATE TABLE ppfd_in_slow(ppfd_in REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT)')
('table', 'ppfd_out_slow', 'ppfd_out_slow', 6, 'CREATE TABLE ppfd_out_slow(ppfd_out REAL, timestamp TEXT, site TEXT, height INT, stat TEXT, record INT, replicate INT)')
('table', 'sap_slows', 'sap_slows', 7