In [1]:
import numpy as np
import os
import pandas as pd

import sys
cwd = os.getcwd()
pkg_dir = '/home/mrossol/NaTGenPD'
pkg_dir = os.path.dirname(cwd)
sys.path.append(pkg_dir)
import NaTGenPD as npd
from NaTGenPD.poly_fit import GenericFit
from NaTGenPD.cli import setup_logger
data_dir = '/projects/naris/CEMS'
filtered_dir = os.path.join(data_dir, 'Filtered')

data_dir = os.path.join(pkg_dir, 'data')
generic_dir = os.path.join(data_dir, 'poly_fits/generic_fits')
if not os.path.exists(generic_dir):
    os.makedirs(generic_dir)

logger = setup_logger('NaTGenPD')

In [2]:
os.listdir(data_dir)

['poly_fits', 'piecewise_fits']

# Generic Fits

In [2]:
fit_dir = os.path.join(data_dir, 'poly_fits')
GenericFit.run(fit_dir, generic_dir)

INFO - 2019-11-01 09:15:45,412 [poly_fit.py:311] : Creating Generic Fit for Boiler (Coal)
  load_maxs = group_fits.as_matrix(['load_max'])
  fit_params = group_fits.as_matrix(['a4', 'a3', 'a2', 'a1', 'a0'])
INFO - 2019-11-01 09:15:45,480 [poly_fit.py:311] : Creating Generic Fit for Boiler (NG)
INFO - 2019-11-01 09:15:45,500 [poly_fit.py:311] : Creating Generic Fit for Boiler (Oil)
INFO - 2019-11-01 09:15:45,513 [poly_fit.py:311] : Creating Generic Fit for Boiler (Other Solid Fuel)
INFO - 2019-11-01 09:15:45,533 [poly_fit.py:311] : Creating Generic Fit for CC (Coal)
INFO - 2019-11-01 09:15:45,566 [poly_fit.py:311] : Creating Generic Fit for CC (NG)
INFO - 2019-11-01 09:15:45,634 [poly_fit.py:311] : Creating Generic Fit for CC (Oil)
INFO - 2019-11-01 09:15:45,659 [poly_fit.py:311] : Creating Generic Fit for CT (NG)
INFO - 2019-11-01 09:15:45,712 [poly_fit.py:311] : Creating Generic Fit for CT (Oil)
INFO - 2019-11-01 09:15:45,736 [poly_fit.py:311] : Creating Generic Fit for Cement Kiln (C

# Filtered Data

In [26]:
h5_path = os.path.join(data_dir, 'SMOKE_Filtered_2016-2017.h5')
with npd.CEMS(h5_path, 'r') as f:
    for group in f.dsets:
        f_name = group.strip(')').replace(' (', '-').replace(' ', '_')
        f_out = os.path.join(filtered_dir, f_name + '.csv')
        print(f_name)
        df = f[group].df
        if 'CC' not in group:
            df = df.drop(columns='cts')
        
        df.to_csv(f_out, index=False)

Boiler-Coal
Boiler-NG
Boiler-Oil
Boiler-Other_Solid_Fuel
CC-Coal
CC-NG
CC-Oil
CT-NG
CT-Oil
Cement_Kiln-Coal
Process_Heater-NG
Stoker-Coal
Stoker-NG
Stoker-Other_Solid_Fuel


# Piecewise Fits

In [28]:
def extract_best_fit(row):
    best_fit = int(row['min_aicc'])
    fit_cols = ['group_type','unit_id', 'cluster']
    col = '{}_b'.format(best_fit)
    fit_cols.append(col)
    rename_cols = {col : 'heat_rate_base'}
    for i in range(1, best_fit + 1):
        col = "{}_m{}".format(best_fit, i)
        fit_cols.append(col)
        rename_cols[col] = 'heat_rate_incr{}'.format(i)
        
        col = "{}_x{}".format(best_fit, i)
        fit_cols.append(col)
        rename_cols[col] = 'load_point{}'.format(i)
    
    row = row[fit_cols]
    row = row.rename(rename_cols)
    return row


def reformat_piecewise_fit(piecewise_fits, poly_fits, out_path):
    piecewise_fits = pd.read_csv(piecewise_fits)
    if '2_x3' in piecewise_fits:
        piecewise_fits = piecewise_fits.rename(columns={'2_x3': '3_x3'})
    
    piecewise_fits = piecewise_fits.apply(extract_best_fit, axis=1)
    col_order = ['unit_id', 'group_type', 'cluster',  'heat_rate_base',
                 'heat_rate_incr1', 'load_point1',
                 'heat_rate_incr2', 'load_point2',
                 'heat_rate_incr3', 'load_point3']
    col_order = [c for c in col_order if c in piecewise_fits]

    merge_cols = ['unit_id', 'latitude', 'longitude',
                  'state', 'EPA_region', 'NERC_region',
                  'unit_type', 'fuel_type']
    
    poly_fits = pd.read_csv(poly_fits)
    piecewise_fits = pd.merge(piecewise_fits[col_order], poly_fits[merge_cols],
                              on='unit_id', how='left')
    
    piecewise_fits.to_csv(out_path, index=None)

In [29]:
out_dir = os.path.join(data_dir, 'piecewise_fits')
piecewise_dir = os.path.join(out_dir, 'all_fits')
poly_dir = os.path.join(data_dir, 'poly_fits')
for fname in os.listdir(piecewise_dir):
    group_name = fname.split('_')[0]
    in_path = os.path.join(piecewise_dir, fname)
    poly_path = os.path.join(poly_dir, '{}_fits.csv'.format(group_name))
    out_path = os.path.join(out_dir, '{}_piecewise_fits.csv'.format(group_name))
    reformat_piecewise_fit(in_path, poly_path, out_path)
    print('{} processed'.format(fname))

CT (Oil)_linpiecewise_fits.csv processed
CC (Coal)_linpiecewise_fits.csv processed
CC (NG)_linpiecewise_fits.csv processed
CC (Oil)_linpiecewise_fits.csv processed
CT (NG)_linpiecewise_fits.csv processed
Boiler (Other Solid Fuel)_linpiecewise_fits.csv processed
Boiler (Coal)_linpiecewise_fits.csv processed
Boiler (Oil)_linpiecewise_fits.csv processed
Stoker (Other Solid Fuel)_linpiecewise_fits.csv processed
Boiler (NG)_linpiecewise_fits.csv processed
