Skip to content

Commit

Permalink
Merge branch 'feature/epathermostat_2.0' into feature/epa2.0_line_vol…
Browse files Browse the repository at this point in the history
…tage
  • Loading branch information
craigmaloney committed Mar 11, 2021
2 parents c65f0aa + 6bc6ebe commit dda7674
Show file tree
Hide file tree
Showing 14 changed files with 3,149 additions and 3,103 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ cache:

language: python
python:
- 3.5
- 3.6
- 3.7
- 3.8
- 3.9

notifications:
email: false
Expand Down
15 changes: 12 additions & 3 deletions scripts/multi_thermostat_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ def main():
# console
logging.captureWarnings(True)

# data_dir = os.path.join("..", "tests", "data", "single_stage")
# This section finds the metadata files and data files for the thermostats.
# These point to examples of the various styles of files
# Single Stage
data_dir = os.path.join("..", "tests", "data", "single_stage")
metadata_filename = os.path.join(data_dir, "metadata.csv")

# Two Stage
# data_dir = os.path.join("..", "tests", "data", "two_stage")
data_dir = os.path.join("..", "tests", "data", "two_stage_ert")
metadata_filename = os.path.join(data_dir, "epa_two_stage_metadata.csv")
# metadata_filename = os.path.join(data_dir, "epa_two_stage_metadata.csv")

# Two Stage ERT
# data_dir = os.path.join("..", "tests", "data", "two_stage_ert")
# metadata_filename = os.path.join(data_dir, "epa_two_stage_metadata.csv")

# Use this to save the weather cache to local disk files
# thermostats = from_csv(metadata_filename, verbose=True, save_cache=True,
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
install_requires=[
'eemeter==2.5.2',
'eeweather==0.3.23',
'numpy < 1.20',
'pandas==0.25.3',
'numpy<=1.20',
'pandas<=1.2.0',
'sqlalchemy==1.3.1',
'zipcodes==1.1.2',
],
Expand Down
10 changes: 5 additions & 5 deletions tests/data/single_stage/thermostat_example_certification.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
product_id,sw_version,metric,filter,region,statistic,season,value
test_product,2.0.0a1,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,lower_bound_95,heating,
test_product,2.0.0a1,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,lower_bound_95,cooling,
test_product,2.0.0a1,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,q20,heating,
test_product,2.0.0a1,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,q20,cooling,
test_product,2.0.0a1,rhu_30F_to_45F,tau_cvrmse_savings_p01,all,upper_bound_95,heating,
test_product,2.0.0a2,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,lower_bound_95,heating,20.17
test_product,2.0.0a2,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,lower_bound_95,cooling,44.36
test_product,2.0.0a2,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,q20,heating,19.83
test_product,2.0.0a2,percent_savings_baseline_percentile,tau_cvrmse_savings_p01,national_weighted_mean,q20,cooling,38.44
test_product,2.0.0a2,rhu_30F_to_45F,tau_cvrmse_savings_p01,all,upper_bound_95,heating,0.09
4,268 changes: 2,134 additions & 2,134 deletions tests/data/single_stage/thermostat_example_stats.csv

Large diffs are not rendered by default.

1,748 changes: 874 additions & 874 deletions tests/data/two_stage_ert/thermostat_example_stats.csv

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions tests/test_core_single_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,8 @@ def test_pandas_warnings(thermostat_type_1):
with pytest.warns(Warning):
__pandas_warnings('0.21.0')

with pytest.warns(Warning):
__pandas_warnings('1.2.0')

with pytest.warns(None) as pytest_warnings:
__pandas_warnings('0.25.3')
__pandas_warnings('1.2.0')
assert not pytest_warnings

assert __pandas_warnings(None) is None
Expand Down
4 changes: 2 additions & 2 deletions tests/test_eeweather_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@


def test_get_indexed_temperatures_eeweather_empty_index():
empty_index = pd.DataFrame()
empty_index = pd.DataFrame([])
results = get_indexed_temperatures_eeweather('720648', empty_index)
assert results.empty is True


def test_get_index_temperatures_eeweather():
begin_timestamp = pd.Timestamp('2011-01-01 00:00:00')
periods = 8766
hourly_index = pd.date_range(begin_timestamp, periods=periods, freq="H")
hourly_index = pd.date_range(begin_timestamp, periods=periods, freq='H', tz='UTC')
results = get_indexed_temperatures_eeweather('720648', hourly_index)
assert results.shape == (8766,)
26 changes: 17 additions & 9 deletions thermostat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@ def __pandas_warnings(pandas_version):
warnings.warn(
"WARNING: Pandas version 0.21.x has known issues and is not supported. "
"Please upgrade to the Pandas version 0.25.3.")
# Pandas 1.x causes issues. Need to warn about this at the moment.
if pd_major >= 1:
warnings.warn(
"WARNING: Pandas version 1.x has changed significantly, and causes "
"issues with this software. We are working on supporting Pandas 1.x in "
"a future release. Please downgrade to Pandas 0.25.3")

except Exception:
# If we can't figure out the version string then don't worry about it for now
Expand Down Expand Up @@ -655,7 +649,7 @@ def get_resistance_heat_utilization_bins(self, runtime_temp, bins, core_heating_

# Create the bins and group by them
runtime_temp['bins'] = pd.cut(runtime_temp['temperature'], bins)
runtime_rhu = runtime_temp.groupby('bins')['heat_runtime', 'aux_runtime', 'emg_runtime', 'total_minutes'].sum()
runtime_rhu = runtime_temp.groupby('bins')[['heat_runtime', 'aux_runtime', 'emg_runtime', 'total_minutes']].sum()

# Calculate the RHU based on the bins
runtime_rhu['rhu'] = (runtime_rhu['aux_runtime'] + runtime_rhu['emg_runtime']) / (runtime_rhu['heat_runtime'] + runtime_rhu['emg_runtime'])
Expand Down Expand Up @@ -863,7 +857,11 @@ def estimate_errors(tau_estimate):
mape = np.nanmean(np.absolute(errors / mean_daily_runtime))
mae = np.nanmean(np.absolute(errors))

return pd.Series(cdd, index=daily_index), tau_estimate, alpha_estimate, mse, rmse, cvrmse, mape, mae
demand = pd.Series(cdd, index=daily_index)
if demand.empty is True:
demand = np.nan

return demand, tau_estimate, alpha_estimate, mse, rmse, cvrmse, mape, mae

def get_heating_demand(self, core_heating_day_set):
"""
Expand Down Expand Up @@ -987,8 +985,12 @@ def estimate_errors(tau_estimate):
mape = np.nanmean(np.absolute(errors / mean_daily_runtime))
mae = np.nanmean(np.absolute(errors))

demand = pd.Series(hdd, index=daily_index)
if demand.empty is True:
demand = np.nan

return (
pd.Series(hdd, index=daily_index),
demand,
tau_estimate,
alpha_estimate,
mse,
Expand Down Expand Up @@ -1285,6 +1287,9 @@ def _calculate_cooling_epa_field_savings_metrics(
mae,
) = self.get_cooling_demand(core_cooling_day_set)

if demand.empty is True:
demand = np.nan

total_runtime_core_cooling = daily_runtime.sum()
n_days = core_cooling_day_set.daily.sum()
n_hours = core_cooling_day_set.hourly.sum()
Expand Down Expand Up @@ -1442,6 +1447,9 @@ def _calculate_heating_epa_field_savings_metrics(
mae,
) = self.get_heating_demand(core_heating_day_set)

if demand.empty is True:
demand = np.nan

total_runtime_core_heating = daily_runtime.sum()
n_days = core_heating_day_set.daily.sum()
n_hours = core_heating_day_set.hourly.sum()
Expand Down
2 changes: 1 addition & 1 deletion thermostat/eeweather_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_indexed_temperatures_eeweather(usaf_id, index):
"""

if index.shape == (0, 0) or index.shape == (0,):
return pd.Series([], index=index, dtype=float)
return pd.Series([], index=(), dtype=float)
years = sorted(index.groupby(index.year).keys())
start = pd.to_datetime(datetime(years[0], 1, 1), utc=True)
end = pd.to_datetime(datetime(years[-1], 12, 31, 23, 59), utc=True)
Expand Down
125 changes: 73 additions & 52 deletions thermostat/importers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
from thermostat.core import Thermostat
from thermostat.equipment_type import (
has_heating,
Expand Down Expand Up @@ -147,7 +148,8 @@ def normalize_utc_offset(utc_offset):
e))


def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, cache_path=None):
def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True,
cache_path=None, log_error=True, log_error_filename='thermostat_import_errors.csv'):
"""
Creates Thermostat objects from data stored in CSV files.
Expand All @@ -162,7 +164,11 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, c
shuffle: boolean
Shuffles the thermostats to give them random ordering if desired (helps with caching).
cache_path: str
Directory path to save the cached data
Directory path to save the cached data.
log_error: boolean
Create a log file of thermostats that weren't imported and the reason they weren't imported.
log_error_filename: boolean
Name of the file to use for logging the thermostats that weren't imported.
Returns
-------
Expand All @@ -187,6 +193,7 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, c
)
metadata.fillna('', inplace=True)


# Shuffle the results to help alleviate cache issues
if shuffle:
logging.info("Metadata randomized to prevent collisions in cache.")
Expand All @@ -203,20 +210,27 @@ def from_csv(metadata_filename, verbose=False, save_cache=False, shuffle=True, c
p.close()
p.join()

# Bad thermostats return None so remove those.
results = [x for x in result_list if x is not None]

# Check for thermostats that were not loaded and log them
metadata_thermostat_ids = set(metadata.thermostat_id)
loaded_thermostat_ids = set([x.thermostat_id for x in results])
missing_thermostats = metadata_thermostat_ids.difference(loaded_thermostat_ids)
missing_thermostats_num = len(missing_thermostats)
if missing_thermostats_num > 0:
logging.warning("Unable to load {} thermostat records because of "
"errors. Please check the logs for the following thermostats:".format(
missing_thermostats_num))
for thermostat in missing_thermostats:
logging.warning(thermostat)
results = []
error_list = []

for result in result_list:
if result['thermostat'] is None:
for error in result['errors']:
logging.warning(result['thermostat_id'] + ': ' + error)
error_dict = {}
error_dict['thermostat_id'] = result['thermostat_id']
error_dict['error'] = error
error_list.append(error_dict)
else:
results.append(result['thermostat'])

if log_error and error_list:
fieldnames = ['thermostat_id', 'error']
with open(log_error_filename, 'w') as error_file:
writer = csv.DictWriter(error_file, fieldnames=fieldnames, dialect='excel')
writer.writeheader()
for thermostat_error in error_list:
writer.writerow(thermostat_error)

# Convert this to an iterator to maintain compatibility
return iter(results)
Expand All @@ -232,48 +246,54 @@ def _multiprocess_func(metadata, metadata_filename, verbose=False, save_cache=Fa

interval_data_filename = os.path.join(os.path.dirname(metadata_filename), row.interval_data_filename)

status_metadata = {
'thermostat_id': row.thermostat_id,
'errors': [],
'thermostat': None,
}
errors = []
thermostat = None

try:
thermostat = get_single_thermostat(
thermostat_id=row.thermostat_id,
zipcode=row.zipcode,
heat_type=row.heat_type,
heat_stage=row.heat_stage,
cool_type=row.cool_type,
cool_stage=row.cool_stage,
utc_offset=row.utc_offset,
interval_data_filename=interval_data_filename,
save_cache=save_cache,
cache_path=cache_path,
thermostat_id=row.thermostat_id,
zipcode=row.zipcode,
heat_type=row.heat_type,
heat_stage=row.heat_stage,
cool_type=row.cool_type,
cool_stage=row.cool_stage,
utc_offset=row.utc_offset,
interval_data_filename=interval_data_filename,
save_cache=save_cache,
cache_path=cache_path,
)
except ZCTAError as e:
# Could not locate a station for the thermostat. Warn and skip.
warnings.warn(
"Skipping import of thermostat (id={}) for which "
errors.append(
"Skipping import of thermostat because "
"a sufficient source of outdoor weather data could not"
"be located using the given ZIP code ({}). This is likely "
f"be located using the given ZIP code ({row.zipcode}). This is likely "
"due to the discrepancy between US Postal Service ZIP "
"codes (which do not always map well to locations) and "
"Census Bureau ZCTAs (which usually do). Please supply "
"a zipcode which corresponds to a US Census Bureau ZCTA."
"\nError Message: {}"
.format(row.thermostat_id, row.zipcode, e))
return
f"\nError Message: {e}"
)

except ISDDataNotAvailableError as e:
warnings.warn(
"Skipping import of thermostat(id={} because the NCDC "
"does not have data: {}"
.format(row.thermostat_id, e))
return
errors.append(
"Skipping import of thermostat because the NCDC "
f"does not have data: {e}"
)

except Exception as e:
warnings.warn(
"Skipping import of thermostat(id={}) because of "
"the following error: {}"
.format(row.thermostat_id, e))
return
errors.append(
f"Skipping import of thermostat because of "
f"the following error: {e}")

return thermostat
status_metadata['errors'] = errors
status_metadata['thermostat'] = thermostat
return status_metadata


def get_single_thermostat(thermostat_id, zipcode,
Expand Down Expand Up @@ -367,17 +387,18 @@ def get_single_thermostat(thermostat_id, zipcode,
enough_cool_runtime = True
enough_heat_runtime = True

# Currently checks hourly runtime, not daily
if cool_runtime is not None:
enough_cool_runtime = _enough_daily_runtume(cool_runtime)
enough_cool_runtime = _enough_runtume(cool_runtime)
if heat_runtime is not None:
enough_heat_runtime = _enough_daily_runtume(heat_runtime)
enough_heat_runtime = _enough_runtume(heat_runtime)

if not(enough_cool_runtime and enough_heat_runtime):
message = "Not enough runtime for thermostat %s\n" % thermostat_id
message = "Not enough runtime for thermostat "
if not enough_heat_runtime:
message += "Heat runtime has over 5% missing data.\n"
message += "(Heat runtime has over 5% missing data) "
if not enough_cool_runtime:
message += "Cool runtime has over 5% missing data.\n"
message += "(Cool runtime has over 5% missing data) "
raise ValueError(message)

# create thermostat instance
Expand Down Expand Up @@ -481,10 +502,10 @@ def _create_series(df, index):
return series


def _enough_daily_runtume(series):
def _enough_runtume(series):
if series is None:
return False

num_days = len(series)
num_dropped_days = len(series.dropna())
return (num_dropped_days / num_days) > 0.95
num_elements = len(series)
num_dropped_elements = len(series.dropna())
return (num_dropped_elements / num_elements) > 0.95
2 changes: 1 addition & 1 deletion thermostat/stations.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_closest_station_by_zipcode(zipcode):

if station is None:
zipcode_mapping = zipcodes.matching(zipcode)
warn("No station found for ZCTA / ZIP %s (%s, %s)." % (
warnings.warn("No station found for ZCTA / ZIP %s (%s, %s)." % (
zipcode,
zipcode_mapping[0].get('city'),
zipcode_mapping[0].get('state')
Expand Down
Loading

0 comments on commit dda7674

Please sign in to comment.