Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix commercial local docker runs #124

Merged
merged 8 commits into from
Jan 10, 2020
14 changes: 6 additions & 8 deletions buildstockbatch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ class SimulationExists(Exception):

class BuildStockBatchBase(object):

OS_VERSION = '2.8.1'
OS_SHA = '88b69707f1'
LOGO = '''
_ __ _ __, _ __
( / ) o // /( _/_ / ( / ) _/_ /
Expand All @@ -65,11 +63,11 @@ def __init__(self, project_filename):
self._weather_dir = None
# Call property to create directory and copy weather files there
_ = self.weather_dir # noqa: F841
# Load in overriding OS_VERSION and OS_SHA arguments if they exist in the YAML
if 'os_version' in self.cfg.keys():
self.OS_VERSION = self.cfg['os_version']
if 'os_sha' in self.cfg.keys():
self.OS_SHA = self.cfg['os_sha']
# Load in OS_VERSION and OS_SHA arguments if they exist in the YAML,
# otherwise use defaults specified here.
self.os_version = self.cfg.get('os_version', '2.8.1')
rHorsey marked this conversation as resolved.
Show resolved Hide resolved
self.os_sha = self.cfg.get('os_sha', '88b69707f1')
logger.debug(f"Using OpenStudio version: {self.os_version} with SHA: {self.os_sha}")

def path_rel_to_projectfile(self, x):
if os.path.isabs(x):
Expand Down Expand Up @@ -261,7 +259,7 @@ def validate_project(project_file):
assert(BuildStockBatchBase.validate_options_lookup(project_file))
assert(BuildStockBatchBase.validate_measure_references(project_file))
assert(BuildStockBatchBase.validate_reference_scenario(project_file))
#assert(BuildStockBatchBase.validate_options_lookup(project_file))
# assert(BuildStockBatchBase.validate_options_lookup(project_file))
logger.info('Base Validation Successful')
return True

Expand Down
18 changes: 9 additions & 9 deletions buildstockbatch/hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self, project_filename):
self.project_dir
)
elif sampling_algorithm == 'precomputed':
print('calling precomputed sampler')
logger.info('calling precomputed sampler')
self.sampler = PrecomputedSingularitySampler(
self.output_dir,
self.cfg,
Expand All @@ -82,11 +82,11 @@ def __init__(self, project_filename):
def output_dir(self):
raise NotImplementedError

@classmethod
def singularity_image_url(cls):
@property
def singularity_image_url(self):
return 'https://s3.amazonaws.com/openstudio-builds/{ver}/OpenStudio-{ver}.{sha}-Singularity.simg'.format(
ver=cls.OS_VERSION,
sha=cls.OS_SHA
ver=self.os_version,
sha=self.os_sha
)

@property
Expand All @@ -95,17 +95,17 @@ def singularity_image(self):
if 'sys_image_dir' in self.cfg.keys():
sys_image_dir = self.cfg['sys_image_dir']
sys_image = os.path.join(sys_image_dir, 'OpenStudio-{ver}.{sha}-Singularity.simg'.format(
ver=self.OS_VERSION,
sha=self.OS_SHA
ver=self.os_version,
sha=self.os_sha
))
if os.path.isfile(sys_image):
return sys_image
else:
raise RuntimeError('Unable to find singularity image specified in project file: `{}`'.format(sys_image))
# Use the expected HPC environment default if not explicitly defined in the YAML
sys_image = os.path.join(self.sys_image_dir, 'OpenStudio-{ver}.{sha}-Singularity.simg'.format(
ver=self.OS_VERSION,
sha=self.OS_SHA
ver=self.os_version,
sha=self.os_sha
))
if os.path.isfile(sys_image):
return sys_image
Expand Down
28 changes: 19 additions & 9 deletions buildstockbatch/localdocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,21 @@ class LocalDockerBatch(BuildStockBatchBase):
def __init__(self, project_filename):
super().__init__(project_filename)
self.docker_client = docker.DockerClient.from_env()
logger.debug('Pulling docker image')
self.docker_client.images.pull(self.docker_image())
# On Windows, check that the docker server is responding (that you have the Docker Daemon running)
# this will hopefully prevent other people from trying to debug bogus issues.
if os.name == 'nt':
try:
self.docker_client.ping()
except: # noqa: E722 (allow bare except in this case because error is weird non-class Windows API error)
logger.error('The docker server did not respond, make sure Docker Desktop is started then retry.')
raise RuntimeError('The docker server did not respond, make sure Docker Desktop is started then retry.')

logger.debug(f"Pulling docker image {self.docker_image}")
self.docker_client.images.pull(self.docker_image)

if self.stock_type == 'residential':
self.sampler = ResidentialDockerSampler(
self.docker_image(),
self.docker_image,
self.cfg,
self.buildstock_dir,
self.project_dir
Expand All @@ -52,7 +61,7 @@ def __init__(self, project_filename):
self.project_dir
)
elif sampling_algorithm == 'precomputed':
print('calling precomputed sampler')
logger.info('calling precomputed sampler')
self.sampler = PrecomputedDockerSampler(
self.output_dir,
self.cfg,
Expand All @@ -69,12 +78,12 @@ def validate_project(project_file):
super(LocalDockerBatch, LocalDockerBatch).validate_project(project_file)
# LocalDocker specific code goes here

@classmethod
def docker_image(cls):
return 'nrel/openstudio:{}'.format(cls.OS_VERSION)
@property
def docker_image(self):
return 'nrel/openstudio:{}'.format(self.os_version)

@classmethod
def run_building(cls, project_dir, buildstock_dir, weather_dir, results_dir, measures_only,
def run_building(cls, project_dir, buildstock_dir, weather_dir, docker_image, results_dir, measures_only,
cfg, i, upgrade_idx=None):
try:
sim_id, sim_dir = cls.make_sim_dir(i, upgrade_idx, os.path.join(results_dir, 'simulation_output'))
Expand Down Expand Up @@ -107,7 +116,7 @@ def run_building(cls, project_dir, buildstock_dir, weather_dir, results_dir, mea
if measures_only:
args.insert(2, '--measures_only')
container_output = docker_client.containers.run(
cls.docker_image(),
docker_image,
args,
remove=True,
volumes=docker_volume_mounts,
Expand All @@ -134,6 +143,7 @@ def run_batch(self, n_jobs=-1, measures_only=False):
self.project_dir,
self.buildstock_dir,
self.weather_dir,
self.docker_image,
self.results_dir,
measures_only,
self.cfg
Expand Down
32 changes: 18 additions & 14 deletions buildstockbatch/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import gzip
import json
import logging
import numpy as np
import os
import random
import re
Expand All @@ -30,6 +31,7 @@
from pathlib import Path
import pyarrow as pa
from pyarrow import parquet
from joblib import Parallel, delayed

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -215,7 +217,7 @@ def write_output(results_dir, group_pq):
logger.debug(f"The concatenated parquet file is consuming {sys.getsizeof(pq) / (1024 * 1024) :.2f} MB.")
write_dataframe_as_parquet(pq, results_dir, file_path)

# Liang's edit starts

def athena_return(year, ts_index, metadata):
"""
Function to read the 15 minute time series parquet file, drop unneeded columns, & save to a new folder for gzip
Expand All @@ -225,7 +227,7 @@ def athena_return(year, ts_index, metadata):
:return: Dictionary with id and success of the attempt to generate the athena parquet file
"""

print ('Processing the folder {}.'.format(metadata['building_id']))
print('Processing the folder {}.'.format(metadata['building_id']))

ts_name_mapping = {
'Electricity:Facility__[kWh]': 'electricity_facility_kWh',
Expand Down Expand Up @@ -265,23 +267,25 @@ def athena_return(year, ts_index, metadata):
ts_df = ts_df[new_order]
ts_df.to_parquet(output_path)
return {'id': metadata['building_id'], 'good': True}
except ArrowIOError:
except pa.lib.ArrowIOError:
return {'id': metadata['building_id'], 'good': False}


def concat_athena_parquets(chunck, index):
to_concat = []
for file in chunck:
try:
to_concat.append(pd.read_parquet(file))
except ArrowIOError:
except pa.lib.ArrowIOError:
continue
to_save = pd.concat(to_concat)
to_save.to_parquet(working_dir + 'athena_{}.parquet'.format(index))


def combine_results_ComStock(working_dir, results_dir, year, total_file_number):
#year = '2012'
#working_dir = '/projects/eedr/comstock/full-comstock/results/simulation_output/'
#results_dir = '/projects/eedr/comstock/full-comstock/results/parquet/baseline'
# year = '2012'
# working_dir = '/projects/eedr/comstock/full-comstock/results/simulation_output/'
# results_dir = '/projects/eedr/comstock/full-comstock/results/parquet/baseline'

data_len_dic = {'2012': 35136,
'2013': 35040,
Expand All @@ -290,13 +294,13 @@ def combine_results_ComStock(working_dir, results_dir, year, total_file_number):
'2016': 35136,
'2017': 35040}

if year not in ['2012','2013','2014','2015','2016','2017']:
if year not in ['2012', '2013', '2014', '2015', '2016', '2017']:
raise RuntimeError('YEAR provided was {}'.format(year))
# Iterate over the combination of scenarios and years in the LA100 study
metadf = pd.read_parquet(results_dir +'/results_up00.parquet')
#metadf = pd.read_parquet(results_dir +'/results_up00.parquet').iloc[0:20,:]
metadf = pd.read_parquet(results_dir + '/results_up00.parquet')
# metadf = pd.read_parquet(results_dir + '/results_up00.parquet').iloc[0:20,:]
metadf = metadf.loc[metadf['completed_status'] == 'Success']
#metadf = pd.read_parquet(results_dir +'/results_up00.parquet')
# metadf = pd.read_parquet(results_dir +'/results_up00.parquet')
cols_to_keep = [
'building_id',
'build_existing_model.aspect_ratio',
Expand Down Expand Up @@ -326,7 +330,7 @@ def combine_results_ComStock(working_dir, results_dir, year, total_file_number):
'building_id': 'building_id',
'build_existing_model.aspect_ratio': 'aspect_ratio',
'build_existing_model.changebuildinglocation_climate_zone': 'cz',
'build_existing_model.changebuildinglocation_weather_file_name':'weather_file',
'build_existing_model.changebuildinglocation_weather_file_name': 'weather_file',
'build_existing_model.create_bar_from_building_type_ratios_bldg_type_a': 'bldg_type',
'build_existing_model.create_bar_from_building_type_ratios_floor_height': 'ftf_height',
'build_existing_model.create_bar_from_building_type_ratios_num_stories_above_grade': 'ns',
Expand Down Expand Up @@ -365,7 +369,7 @@ def combine_results_ComStock(working_dir, results_dir, year, total_file_number):

# Step 2. aggregate parquet files
# Aggregate individual athena files into total_file_number files
#total_file_number = 140
# total_file_number = 140
new_parquets = [
working_dir + 'up00/bldg{0:07d}/athena.parquet'.format(dask_bag_list[item]['building_id']) for
item in dask_bag_list
Expand All @@ -375,7 +379,7 @@ def combine_results_ComStock(working_dir, results_dir, year, total_file_number):
Parallel(n_jobs=-1, verbose=9)(
delayed(concat_athena_parquets)(chunks[index], index) for index in range(len(chunks)))
print('Completed aggregating athena parquet files for ComStock up00.')
# Liang's edit ends


def combine_results(results_dir, config, skip_timeseries=False, aggregate_timeseries=False, reporting_measures=[]):
fs = open_fs(results_dir)
Expand Down
2 changes: 1 addition & 1 deletion buildstockbatch/sampler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
from .residential_docker import ResidentialDockerSampler # noqa F041
from .residential_singularity import ResidentialSingularitySampler # noqa F041
from .commercial_sobol import CommercialSobolSingularitySampler, CommercialSobolDockerSampler # noqa F041
from .precomputed import PrecomputedDockerSampler, PrecomputedSingularitySampler
from .precomputed import PrecomputedDockerSampler, PrecomputedSingularitySampler # noqa F041
25 changes: 18 additions & 7 deletions buildstockbatch/sampler/precomputed.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def check_sampling(self, output_path, n_datapoints=None):
'to the number of rows in the buildstock.csv file. Remove or comment out baseline->n_datapoints from '
'your project file.'
)
shutil.copyfile(os.path.abspath(sample_filename), os.path.join(os.path.dirname(os.path.abspath(output_path)),
'housing_characteristics', 'buildstock.csv'))
if os.path.abspath(sample_filename) == os.path.abspath(output_path):
return os.path.abspath(output_path)
else:
Expand Down Expand Up @@ -114,7 +112,7 @@ def run_sampling(self, n_datapoints=None):

class PrecomputedDockerSampler(PrecomputedBaseSampler):

def __init__(self, *args, **kwargs):
def __init__(self, output_dir, *args, **kwargs):
"""
This class uses the Commercial Precomputed Sampler for local Docker deployments
"""
Expand All @@ -133,8 +131,21 @@ def run_sampling(self, n_datapoints=None):
buildstock_path = self.cfg['baseline']['precomputed_sample']
if not os.path.isfile(buildstock_path):
raise RuntimeError('Cannot find buildstock file {}'.format(buildstock_path))
shutil.copy2(buildstock_path, os.path.join(self.project_dir, 'housing_characteristics'))
shutil.move(os.path.join(os.path.join(self.project_dir, 'housing_characteristics'),
os.path.basename(buildstock_path)), csv_path)
shutil.copy2(csv_path, os.path.join(self.project_dir, 'buildstock.csv'))
housing_chars_dir = os.path.join(self.project_dir, 'housing_characteristics')
housing_chars_buildstock_path = os.path.join(housing_chars_dir, os.path.basename(buildstock_path))
# Copy the arbitrarily named buildstock file into the housing_characteristics directory
# (unless it is already in that directory)
if os.path.abspath(buildstock_path) != os.path.abspath(housing_chars_buildstock_path):
logger.debug(f"Copying {buildstock_path} to \n {housing_chars_buildstock_path}")
shutil.copy2(buildstock_path, housing_chars_buildstock_path)
# Copy the arbitrarily named buildstock file to 'buildstock.csv'
# (unless it is already named 'buildstock.csv')
if os.path.abspath(housing_chars_buildstock_path) != os.path.abspath(csv_path):
logger.debug(f"Copying {housing_chars_buildstock_path} to \n {csv_path}")
shutil.copy2(housing_chars_buildstock_path, csv_path)
# Copy the 'buildstock.csv' file back to the project root
project_root_buildstock_path = os.path.join(self.project_dir, 'buildstock.csv')
if os.path.abspath(csv_path) != os.path.abspath(project_root_buildstock_path):
logger.debug(f"Copying {housing_chars_buildstock_path} to \n {csv_path}")
shutil.copy2(csv_path, project_root_buildstock_path)
return self.check_sampling(csv_path, n_datapoints)
50 changes: 29 additions & 21 deletions buildstockbatch/test/test_docker.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
import requests
import re
from unittest.mock import patch

from buildstockbatch.localdocker import LocalDockerBatch
from buildstockbatch.base import BuildStockBatchBase


def test_docker_image_exists_on_docker_hub():
docker_image, docker_tag = LocalDockerBatch.docker_image().split(':')
baseurl = 'https://registry.hub.docker.com/v2/'
r1 = requests.get(baseurl)
assert(r1.status_code == 401)
m = re.search(r'realm="(.+?)"', r1.headers['Www-Authenticate'])
authurl = m.group(1)
m = re.search(r'service="(.+?)"', r1.headers['Www-Authenticate'])
service = m.group(1)
r2 = requests.get(authurl, params={
'service': service,
'scope': f'repository:{docker_image}:pull'
})
assert(r2.ok)
token = r2.json()['token']
r3 = requests.head(
f'{baseurl}{docker_image}/manifests/{docker_tag}',
headers={'Authorization': f'Bearer {token}'}
)
assert(r3.ok)
def test_docker_image_exists_on_docker_hub(basic_residential_project_file):
project_filename, results_dir = basic_residential_project_file()
# Use a BuildStockBatchBase instance to get the version of OpenStudio
# because instantiating a LocalDockerBatch fails to connect
# with the docker website in the testing context for some reason.
with patch.object(BuildStockBatchBase, 'weather_dir', None):
bsb = BuildStockBatchBase(project_filename)
docker_image = 'nrel/openstudio'
docker_tag = bsb.os_version
baseurl = 'https://registry.hub.docker.com/v2/'
r1 = requests.get(baseurl)
assert(r1.status_code == 401)
m = re.search(r'realm="(.+?)"', r1.headers['Www-Authenticate'])
authurl = m.group(1)
m = re.search(r'service="(.+?)"', r1.headers['Www-Authenticate'])
service = m.group(1)
r2 = requests.get(authurl, params={
'service': service,
'scope': f'repository:{docker_image}:pull'
})
assert(r2.ok)
token = r2.json()['token']
r3 = requests.head(
f'{baseurl}{docker_image}/manifests/{docker_tag}',
headers={'Authorization': f'Bearer {token}'}
)
assert(r3.ok)
13 changes: 9 additions & 4 deletions buildstockbatch/test/test_hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
from buildstockbatch.hpc import HPCBatchBase


def test_singularity_image_download_url():
url = HPCBatchBase.singularity_image_url()
r = requests.head(url)
assert r.status_code == requests.codes.ok
def test_singularity_image_download_url(basic_residential_project_file):
project_filename, results_dir = basic_residential_project_file()
with patch.object(HPCBatchBase, 'weather_dir', None), \
patch.object(HPCBatchBase, 'output_dir', results_dir), \
patch.object(HPCBatchBase, 'singularity_image', '/path/to/singularity.simg'):
hpc_batch_base = HPCBatchBase(project_filename)
url = hpc_batch_base.singularity_image_url
r = requests.head(url)
assert r.status_code == requests.codes.ok


@patch('buildstockbatch.hpc.subprocess')
Expand Down