In [None]:
import argparse
import pandas as pd
import time
from .exact_engine import ExactQueryEngine
from .approx_engine import ApproxQueryEngine

In [None]:
def load_data(path):
    return pd.read_csv(path, parse_dates=['ts'])

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', default='data/sample_data.csv')
    parser.add_argument('--sample_frac', type=float, default=0.1)
    parser.add_argument('--agg', default='COUNT')
    parser.add_argument('--col', default=None)
    parser.add_argument('--group_by', default='group')
    
    
    args = parser.parse_args()
    df = load_data(args.data)
    
    
    exact = ExactQueryEngine(df)
    approx = ApproxQueryEngine(df, sample_frac=args.sample_frac)
    
    
    group_by = args.group_by.split(',') if args.group_by else None
    
    
    t0 = time.time()
    ex = exact.query(args.agg, col=args.col, group_by=group_by)
    t1 = time.time()
    ap = approx.query(args.agg, col=args.col, group_by=group_by)
    t2 = time.time()
    
    
    print(f'Exact runtime: {t1-t0:.4f}s, Approx runtime: {t2-t1:.4f}s (sample_frac={args.sample_frac})')
    print('\nExact result (head):')
    print(ex.head())
    print('\nApprox result (head):')
    print(ap.head())




if __name__ == '__main__':
    main()