diff --git a/buildstockbatch/aws/aws.py b/buildstockbatch/aws/aws.py index 6e4c1a7f..6bff6a5f 100644 --- a/buildstockbatch/aws/aws.py +++ b/buildstockbatch/aws/aws.py @@ -14,8 +14,6 @@ import base64 import boto3 from botocore.exceptions import ClientError -import csv -from fsspec.implementations.local import LocalFileSystem import gzip from joblib import Parallel, delayed import json @@ -24,15 +22,12 @@ import pathlib import random from s3fs import S3FileSystem -import shutil -import subprocess import tarfile import re import time import io import zipfile -from buildstockbatch import postprocessing from buildstockbatch.aws.awsbase import AwsJobBase from buildstockbatch.base import ValidationError from buildstockbatch.cloud.docker_base import DockerBatchBase @@ -1700,37 +1695,7 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): weather_dir = sim_dir / "weather" os.makedirs(weather_dir, exist_ok=True) - # Make a lookup of which parameter points to the weather file from options_lookup.tsv - with open(sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8") as f: - tsv_reader = csv.reader(f, delimiter="\t") - next(tsv_reader) # skip headers - param_name = None - epws_by_option = {} - for row in tsv_reader: - row_has_epw = [x.endswith(".epw") for x in row[2:]] - if sum(row_has_epw): - if row[0] != param_name and param_name is not None: - raise RuntimeError( - "The epw files are specified in options_lookup.tsv under more than one parameter type: " - f"{param_name}, {row[0]}" - ) - epw_filename = row[row_has_epw.index(True) + 2].split("=")[1] - param_name = row[0] - option_name = row[1] - epws_by_option[option_name] = epw_filename - - # Look through the buildstock.csv to find the appropriate location and epw - epws_to_download = set() - building_ids = [x[0] for x in jobs_d["batch"]] - with open( - sim_dir / "lib" / "housing_characteristics" / "buildstock.csv", - "r", - encoding="utf-8", - ) as f: - csv_reader = csv.DictReader(f) - for row in csv_reader: - if int(row["Building"]) in building_ids: - epws_to_download.add(epws_by_option[row[param_name]]) + epws_to_download = cls.get_epws_to_download(sim_dir, jobs_d) # Download the epws needed for these simulations for epw_filename in epws_to_download: @@ -1740,92 +1705,8 @@ def run_job(cls, job_id, bucket, prefix, job_name, region): with open(weather_dir / epw_filename, "wb") as f_out: logger.debug("Extracting {}".format(epw_filename)) f_out.write(gzip.decompress(f_gz.getvalue())) - asset_dirs = os.listdir(sim_dir) - - fs = S3FileSystem() - local_fs = LocalFileSystem() - reporting_measures = cls.get_reporting_measures(cfg) - dpouts = [] - simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" - with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: - for building_id, upgrade_idx in jobs_d["batch"]: - upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 - sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" - - # Create OSW - osw = cls.create_osw(cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx) - with open(os.path.join(sim_dir, "in.osw"), "w") as f: - json.dump(osw, f, indent=4) - - # Run Simulation - with open(sim_dir / "os_stdout.log", "w") as f_out: - try: - logger.debug("Running {}".format(sim_id)) - subprocess.run( - ["openstudio", "run", "-w", "in.osw"], - check=True, - stdout=f_out, - stderr=subprocess.STDOUT, - cwd=str(sim_dir), - ) - except subprocess.CalledProcessError: - logger.debug(f"Simulation failed: see {sim_id}/os_stdout.log") - - # Clean Up simulation directory - cls.cleanup_sim_dir( - sim_dir, - fs, - f"{bucket}/{prefix}/results/simulation_output/timeseries", - upgrade_id, - building_id, - ) - - # Read data_point_out.json - dpout = postprocessing.read_simulation_outputs( - local_fs, reporting_measures, str(sim_dir), upgrade_id, building_id - ) - dpouts.append(dpout) - - # Add the rest of the simulation outputs to the tar archive - logger.info("Archiving simulation outputs") - for dirpath, dirnames, filenames in os.walk(sim_dir): - if dirpath == str(sim_dir): - for dirname in set(dirnames).intersection(asset_dirs): - dirnames.remove(dirname) - for filename in filenames: - abspath = os.path.join(dirpath, filename) - relpath = os.path.relpath(abspath, sim_dir) - simout_tar.add(abspath, os.path.join(sim_id, relpath)) - - # Clear directory for next simulation - logger.debug("Clearing out simulation directory") - for item in set(os.listdir(sim_dir)).difference(asset_dirs): - if os.path.isdir(item): - shutil.rmtree(item) - elif os.path.isfile(item): - os.remove(item) - - # Upload simulation outputs tarfile to s3 - fs.put( - str(simulation_output_tar_filename), - f"{bucket}/{prefix}/results/simulation_output/simulations_job{job_id}.tar.gz", - ) - # Upload aggregated dpouts as a json file - with fs.open( - f"{bucket}/{prefix}/results/simulation_output/results_job{job_id}.json.gz", - "wb", - ) as f1: - with gzip.open(f1, "wt", encoding="utf-8") as f2: - json.dump(dpouts, f2) - - # Remove files (it helps docker if we don't leave a bunch of files laying around) - os.remove(simulation_output_tar_filename) - for item in os.listdir(sim_dir): - if os.path.isdir(item): - shutil.rmtree(item) - elif os.path.isfile(item): - os.remove(item) + cls.run_simulations(cfg, jobs_d, job_id, sim_dir, S3FileSystem(), bucket, prefix) @log_error_details() diff --git a/buildstockbatch/cloud/docker_base.py b/buildstockbatch/cloud/docker_base.py index fb76417e..46f70447 100644 --- a/buildstockbatch/cloud/docker_base.py +++ b/buildstockbatch/cloud/docker_base.py @@ -9,8 +9,11 @@ :license: BSD-3 """ import collections -import docker +import csv from dataclasses import dataclass +import docker +from fsspec.implementations.local import LocalFileSystem +import gzip import itertools from joblib import Parallel, delayed import json @@ -20,10 +23,12 @@ import pathlib import random import shutil +import subprocess import tarfile import tempfile import time +from buildstockbatch import postprocessing from buildstockbatch.base import BuildStockBatchBase from buildstockbatch.utils import ContainerRuntime, calc_hash_for_file, compress_file, read_csv @@ -311,3 +316,148 @@ def _prep_jobs_for_batch(self, tmppath): ) return DockerBatchBase.BatchInfo(n_sims=n_sims, n_sims_per_job=n_sims_per_job, job_count=job_count) + + @classmethod + def get_epws_to_download(cls, sim_dir, jobs_d): + """ + Gets the list of filenames for the weather data required for a single batch of simulations. + + :param sim_dir: Path to the directory where job files are stored + :param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job. + + :returns: Set of epw filenames needed for this batch of simulations. + """ + # Make a lookup of which parameter points to the weather file from options_lookup.tsv + with open(sim_dir / "lib" / "resources" / "options_lookup.tsv", "r", encoding="utf-8") as f: + tsv_reader = csv.reader(f, delimiter="\t") + next(tsv_reader) # skip headers + param_name = None + epws_by_option = {} + for row in tsv_reader: + row_has_epw = [x.endswith(".epw") for x in row[2:]] + if sum(row_has_epw): + if row[0] != param_name and param_name is not None: + raise RuntimeError( + "The epw files are specified in options_lookup.tsv under more than one parameter type: " + f"{param_name}, {row[0]}" + ) + epw_filename = row[row_has_epw.index(True) + 2].split("=")[1] + param_name = row[0] + option_name = row[1] + epws_by_option[option_name] = epw_filename + + # Look through the buildstock.csv to find the appropriate location and epw + epws_to_download = set() + building_ids = [x[0] for x in jobs_d["batch"]] + with open( + sim_dir / "lib" / "housing_characteristics" / "buildstock.csv", + "r", + encoding="utf-8", + ) as f: + csv_reader = csv.DictReader(f) + for row in csv_reader: + if int(row["Building"]) in building_ids: + epws_to_download.add(epws_by_option[row[param_name]]) + + return epws_to_download + + @classmethod + def run_simulations(cls, cfg, job_id, jobs_d, sim_dir, fs, output_path): + """ + Run one batch of simulations. + + Runs the simulations, writes outputs to the provided storage bucket, and cleans up intermediate files. + + :param cfg: Project config contents. + :param job_id: Index of this job. + :param jobs_d: Contents of a single job JSON file; contains the list of buildings to simulate in this job. + :param sim_dir: Path to the (local) directory where job files are stored. + :param fs: Filesystem to use when writing outputs to storage bucket + :param output_path: File path (typically `bucket/prefix`) to write outputs to. + """ + local_fs = LocalFileSystem() + reporting_measures = cls.get_reporting_measures(cfg) + dpouts = [] + simulation_output_tar_filename = sim_dir.parent / "simulation_outputs.tar.gz" + asset_dirs = os.listdir(sim_dir) + ts_output_dir = (f"{output_path}/results/simulation_output/timeseries",) + + with tarfile.open(str(simulation_output_tar_filename), "w:gz") as simout_tar: + for building_id, upgrade_idx in jobs_d["batch"]: + upgrade_id = 0 if upgrade_idx is None else upgrade_idx + 1 + sim_id = f"bldg{building_id:07d}up{upgrade_id:02d}" + + # Create OSW + osw = cls.create_osw(cfg, jobs_d["n_datapoints"], sim_id, building_id, upgrade_idx) + with open(os.path.join(sim_dir, "in.osw"), "w") as f: + json.dump(osw, f, indent=4) + + # Run Simulation + with open(sim_dir / "os_stdout.log", "w") as f_out: + try: + logger.debug("Running {}".format(sim_id)) + subprocess.run( + ["openstudio", "run", "-w", "in.osw"], + check=True, + stdout=f_out, + stderr=subprocess.STDOUT, + cwd=str(sim_dir), + ) + except subprocess.CalledProcessError: + logger.debug(f"Simulation failed: see {sim_id}/os_stdout.log") + + # Clean Up simulation directory + cls.cleanup_sim_dir( + sim_dir, + fs, + ts_output_dir, + upgrade_id, + building_id, + ) + + # Read data_point_out.json + dpout = postprocessing.read_simulation_outputs( + local_fs, reporting_measures, str(sim_dir), upgrade_id, building_id + ) + dpouts.append(dpout) + + # Add the rest of the simulation outputs to the tar archive + logger.info("Archiving simulation outputs") + for dirpath, dirnames, filenames in os.walk(sim_dir): + if dirpath == str(sim_dir): + for dirname in set(dirnames).intersection(asset_dirs): + dirnames.remove(dirname) + for filename in filenames: + abspath = os.path.join(dirpath, filename) + relpath = os.path.relpath(abspath, sim_dir) + simout_tar.add(abspath, os.path.join(sim_id, relpath)) + + # Clear directory for next simulation + logger.debug("Clearing out simulation directory") + for item in set(os.listdir(sim_dir)).difference(asset_dirs): + if os.path.isdir(item): + shutil.rmtree(item) + elif os.path.isfile(item): + os.remove(item) + + # Upload simulation outputs tarfile to s3 + fs.put( + str(simulation_output_tar_filename), + f"{output_path}/results/simulation_output/simulations_job{job_id}.tar.gz", + ) + + # Upload aggregated dpouts as a json file + with fs.open( + f"{output_path}/results/simulation_output/results_job{job_id}.json.gz", + "wb", + ) as f1: + with gzip.open(f1, "wt", encoding="utf-8") as f2: + json.dump(dpouts, f2) + + # Remove files (it helps docker if we don't leave a bunch of files laying around) + os.remove(simulation_output_tar_filename) + for item in os.listdir(sim_dir): + if os.path.isdir(item): + shutil.rmtree(item) + elif os.path.isfile(item): + os.remove(item) diff --git a/buildstockbatch/test/test_docker_base.py b/buildstockbatch/test/test_docker_base.py index 1be262db..cf4d4b25 100644 --- a/buildstockbatch/test/test_docker_base.py +++ b/buildstockbatch/test/test_docker_base.py @@ -1,13 +1,16 @@ """Tests for the DockerBatchBase class.""" +from fsspec.implementations.local import LocalFileSystem import json import os import pathlib +import shutil import tarfile import tempfile from unittest.mock import MagicMock, PropertyMock from buildstockbatch.cloud.docker_base import DockerBatchBase from buildstockbatch.test.shared_testing_stuff import docker_available +from buildstockbatch.utils import get_project_configuration here = os.path.dirname(os.path.abspath(__file__)) resources_dir = os.path.join(here, "test_inputs", "test_openstudio_buildstock", "resources") @@ -71,3 +74,61 @@ def test_run_batch_prep(basic_residential_project_file, mocker): assert [building, None] in simulations # Building with upgrade 0 assert [building, 0] in simulations + + +def test_get_epws_to_download(): + resources_dir_path = pathlib.Path(resources_dir) + options_file = resources_dir_path / "options_lookup.tsv" + buildstock_file = resources_dir_path / "buildstock_good.csv" + + with tempfile.TemporaryDirectory(prefix="bsb_") as sim_dir_str: + sim_dir = pathlib.Path(sim_dir_str) + os.makedirs(sim_dir / "lib" / "resources") + os.makedirs(sim_dir / "lib" / "housing_characteristics") + shutil.copy(options_file, sim_dir / "lib" / "resources") + shutil.copy(buildstock_file, sim_dir / "lib" / "housing_characteristics" / "buildstock.csv") + + jobs_d = { + "job_num": 0, + "n_datapoints": 10, + "batch": [ + [1, None], + [5, None], + ], + } + + epws = DockerBatchBase.get_epws_to_download(sim_dir, jobs_d) + assert epws == set(["weather/G0100970.epw", "weather/G0100830.epw"]) + + +def test_run_simulations(basic_residential_project_file): + jobs_d = { + "job_num": 0, + "n_datapoints": 10, + "batch": [ + [1, None], + [5, None], + ], + } + fs = LocalFileSystem() + project_filename, results_dir = basic_residential_project_file() + cfg = get_project_configuration(project_filename) + + with tempfile.TemporaryDirectory(prefix="bsb_") as temp_dir_str: + temp_path = pathlib.Path(temp_dir_str) + sim_dir = temp_path / "simdata" / "openstudio" + os.makedirs(sim_dir) + # sim_dir is also the working directory (defined in the nrel/openstudio + # Dockerfile), which some file operations depend on. + old_cwd = os.getcwd() + os.chdir(sim_dir) + bucket = temp_path / "bucket" + os.makedirs(bucket / "test_prefix" / "results" / "simulation_output") + + DockerBatchBase.run_simulations(cfg, 0, jobs_d, sim_dir, fs, f"{bucket}/test_prefix") + + output_dir = bucket / "test_prefix" / "results" / "simulation_output" + assert sorted(os.listdir(output_dir)) == ["results_job0.json.gz", "simulations_job0.tar.gz"] + # Check that files were cleaned up correctly + assert not os.listdir(sim_dir) + os.chdir(old_cwd) diff --git a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv index 1991e198..d69de02c 100644 --- a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv +++ b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/buildstock_good.csv @@ -1,6 +1,6 @@ -Building,Bedroom,Location,Vintage,State,Insulation Wall,Insulation Slab,Zipcode -1,1,AL_Mobile-Rgnl.AP.722230,<1950,CO,Good Option,None,36608 -2,3,AL_Mobile-Rgnl.AP.722230,1940s,CO,Good Option,None,36601 -3,2,AL_Mobile-Rgnl.AP.722230,2010s,VA,Good Option,None,36602 -4,1,AL_Mobile-Rgnl.AP.722230,2000s,VA,Good Option,None,36603 -5,2,AL_Mobile-Rgnl.AP.722230,1970s,VA,Good Option,None,36604 \ No newline at end of file +Building,Bedroom,Location,Vintage,State,Insulation Wall,Insulation Slab,Zipcode,County +1,1,AL_Mobile-Rgnl.AP.722230,<1950,CO,Good Option,None,36608,County1 +2,3,AL_Mobile-Rgnl.AP.722230,1940s,CO,Good Option,None,36601,County1 +3,2,AL_Mobile-Rgnl.AP.722230,2010s,VA,Good Option,None,36602,County1 +4,1,AL_Mobile-Rgnl.AP.722230,2000s,VA,Good Option,None,36603,County2 +5,2,AL_Mobile-Rgnl.AP.722230,1970s,VA,Good Option,None,36604,County2 diff --git a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv index a8bddc09..73032a93 100644 --- a/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv +++ b/buildstockbatch/test/test_inputs/test_openstudio_buildstock/resources/options_lookup.tsv @@ -1,34 +1,34 @@ -Parameter Name Option Name Measure Dir Measure Arg 1 Measure Arg 2 ... -Location AL_Birmingham.Muni.AP.722280 -Location AL_Huntsville.Intl.AP-Jones.Field.723230 -Location AL_Mobile-Rgnl.AP.722230 -Vintage <1940 -Vintage 1940s -Vintage <1950 -Vintage 1950s -Vintage 1960s -Vintage 1970s -Vintage 1980s -Vintage 1990s -Vintage 2000s -Vintage 2010s -State VA -State CO -County County1 -County County2 -Bedroom 1 -Bedroom 2 -Bedroom 3 -Zipcode * -Insulation Slab None -Insulation Slab Good Option ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=0 gap_r=0 exterior_r=0 exterior_depth=0 -Insulation Slab Missing Argument ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=10 gap_r=5 exterior_r=0 -Insulation Unfinished Basement None -Insulation Unfinished Basement Good Option ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=0 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 +Parameter Name Option Name Measure Dir Measure Arg 1 Measure Arg 2 ... +Location AL_Birmingham.Muni.AP.722280 +Location AL_Huntsville.Intl.AP-Jones.Field.723230 +Location AL_Mobile-Rgnl.AP.722230 +Vintage <1940 +Vintage 1940s +Vintage <1950 +Vintage 1950s +Vintage 1960s +Vintage 1970s +Vintage 1980s +Vintage 1990s +Vintage 2000s +Vintage 2010s +State VA +State CO +County County1 weather_station_epw_filepath=weather/G0100970.epw +County County2 weather_station_epw_filepath=weather/G0100830.epw +Bedroom 1 +Bedroom 2 +Bedroom 3 +Zipcode * +Insulation Slab None +Insulation Slab Good Option ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=0 gap_r=0 exterior_r=0 exterior_depth=0 +Insulation Slab Missing Argument ResidentialConstructionsSlab perimeter_r=0 perimeter_width=0 whole_r=10 gap_r=5 exterior_r=0 +Insulation Unfinished Basement None +Insulation Unfinished Basement Good Option ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=0 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 Insulation Unfinished Basement Extra Argument ResidentialConstructionsUnfinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 ceiling_cavity_r=13 ceiling_install_grade=1 ceiling_framing_factor=0.13 ceiling_joist_height_in=9.25 slab_whole_r=0 extra_arg=1 -Insulation Finished Basement None -Insulation Finished Basement Good Option ResidentialConstructionsFinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 slab_whole_r=0 -Insulation Finished Basement Bad Value ResidentialConstructionsFinishedBasement wall_ins_height=4 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=1.5 wall_framing_factor=0 wall_rigid_r=5 wall_drywall_thick_in=0.5 slab_whole_r=0 -Insulation Wall Good Option ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" -Insulation Wall Missing Measure ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" - ResidentialMissingMeasure +Insulation Finished Basement None +Insulation Finished Basement Good Option ResidentialConstructionsFinishedBasement wall_ins_height=0 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=true wall_framing_factor=0 wall_rigid_r=0 wall_drywall_thick_in=0.5 slab_whole_r=0 +Insulation Finished Basement Bad Value ResidentialConstructionsFinishedBasement wall_ins_height=4 wall_cavity_r=0 wall_install_grade=1 wall_cavity_depth_in=0 wall_filled_cavity=1.5 wall_framing_factor=0 wall_rigid_r=5 wall_drywall_thick_in=0.5 slab_whole_r=0 +Insulation Wall Good Option ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" +Insulation Wall Missing Measure ResidentialConstructionsWallsWoodStud cavity_r=0 install_grade=1 cavity_depth_in=3.5 cavity_filled=false framing_factor=0.25 drywall_thick_in=0.5 osb_thick_in=0.5 rigid_r=0.0 "exterior_finish=Vinyl, Light" + ResidentialMissingMeasure diff --git a/docs/changelog/changelog_dev.rst b/docs/changelog/changelog_dev.rst index 8ee3bfaf..4506d2b0 100644 --- a/docs/changelog/changelog_dev.rst +++ b/docs/changelog/changelog_dev.rst @@ -23,3 +23,9 @@ Development Changelog between various implementations (GCP implementation to come). This also includes teasing apart the several batch prep steps (weather, assets, and jobs) into their own methods so they can each be more easily understood, shared, and maintained. + + .. change:: + :tags: general + :pullreq: 422 + + Refactor AWS code so it can be shared by the upcoming GCP implementation.