In [1]:
import pandas as pd
from pathlib import Path
import pathlib

import logging
import getpass
import os
import sys
import shutil
import numpy as np

user = getpass.getuser()

Notebook to maintain BAUS visualizer output files. Core idea is to set a list of runs, and then have the script handle the moving of the relevant files into a staging and production directory.

#### Constants and paths

In [2]:
LOG_FILENAME = 'viz_dir_file_logger.log'

In [3]:
# in my (AO) case, M:\urban_modeling is mounted to /Volumes/Data/Models/urban_modeling
# on lumodel server, directory is just M:\

HOME_DIR = pathlib.Path.home()
M_DRIVE = Path("/Volumes/Data/Models") if os.name != "nt" else Path("M:/")
BOX_DIR = HOME_DIR / 'Box'

In [4]:
# the location BAUS outputs the visualizer files to
viz_dir_storage = M_DRIVE / 'urban_modeling' / 'baus' / \
    'PBA50Plus' / 'BAUS_Visualizer_PBA50Plus_Files'

# the active location for files to be used for the viz 
viz_dir_prod = M_DRIVE / 'urban_modeling' / 'baus' / \
    'PBA50Plus' / 'BAUS_Visualizer_PBA50Plus_Files' / 'PRODUCTION'

In [5]:
# main run inventory list - updated based on the list in this notebook and held in the production folder
model_run_inventory_path = M_DRIVE / 'urban_modeling' / 'baus' / \
    'PBA50Plus' / 'BAUS_Visualizer_PBA50Plus_Files' / 'PRODUCTION' /'model_run_inventory.csv'

In [6]:
# create logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter(
    '%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p'))
logger.addHandler(ch)
# file handler - info
fh = logging.FileHandler(viz_dir_prod / LOG_FILENAME.format("info"), mode='w')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter(
    '%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p'))
logger.addHandler(fh)
# file handler - debug
fh = logging.FileHandler(viz_dir_prod / LOG_FILENAME.format("debug"), mode='w')
fh.setLevel(logging.DEBUG)
fh.setFormatter(logging.Formatter(
    '%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p'))
logger.addHandler(fh)

### Define functions for getting a list of files and moving them between storage and production dirs

In [7]:
def write_list_to_file(data, path):
    """
    Writes a list of strings to a file, with each string on a new line, 
    and includes a header line at the top.

    Parameters:
    data (list of str): The list of strings to write to the file.
    path (str): The file path where the data will be written.

    """
    with open(path, 'w') as file:
        # add header
        file.write("run_name\n")
        # add a run per line
        for item in data:
            file.write(f"{item}\n")

In [8]:
def generate_visualizer_files_from_run_name(run_names, viz_dir_prod, viz_dir_storage):
    """
    Generates a dictionary of file paths for visualizer components based on run names 
    and storage directories, and logs the number of runs processed.

    Parameters:
    run_names (list of str): List of run names to generate file paths for.
    viz_dir_prod (Path): Path to the production visualizer directory.
    viz_dir_storage (Path): Path to the storage visualizer directory.

    Returns:
    dict: A nested dictionary where the first level keys are run names, 
          the second level keys are component names, and the third level keys 
          are storage types ('PROD' or 'STORAGE'), each mapping to the respective file paths.

    Example:
    >>> run_names = ['run1', 'run2']
    >>> viz_dir_prod = Path('/path/to/prod')
    >>> viz_dir_storage = Path('/path/to/storage')
    >>> result = generate_visualizer_files_from_run_name(run_names, viz_dir_prod, viz_dir_storage)
    This will return a dictionary structured as follows:
    {
        'run1': {
            'new_buildings': {'PROD': Path('/path/to/prod/run1_new_buildings_summary.csv'),
                              'STORAGE': Path('/path/to/storage/run1_new_buildings_summary.csv')},
            ...
        },
        'run2': {
            'new_buildings': {'PROD': Path('/path/to/prod/run2_new_buildings_summary.csv'),
                              'STORAGE': Path('/path/to/storage/run2_new_buildings_summary.csv')},
            ...
        }
    }
    """
    #
    # Initialize the outermost dictionary to hold all run data
    l_0 = {}

    for run_name in run_names:
        # Define the paths for various components for the current run
        component_paths = {
            "new_buildings": f"{run_name}_new_buildings_summary.csv",
            "taz": f"{run_name}_taz1_summary_growth.csv",
            "interim_zone_output": f"{run_name}_interim_zone_output_allyears.csv",

            "juris_dr": f"{run_name}_juris_dr_growth.csv",
            "county_dr": f"{run_name}_county_dr_growth.csv",
            "superdistrict_dr": f"{run_name}_superdistrict_dr_growth.csv",

            "juris_summary": f"{run_name}_juris_summary_growth.csv",
            "county_summary": f"{run_name}_county_summary_growth.csv",
            "superdistrict_summary": f"{run_name}_superdistrict_summary_growth.csv"
        }

        # Storage types and their corresponding directories
        storage_type = {'PROD': viz_dir_prod, 'STORAGE': viz_dir_storage}

        # Initialize the dictionary for the current run's components
        l_1 = {}
        for component_key, component_value in component_paths.items():
            # Initialize the dictionary for the storage locations of the current component
            l_2 = {}
            for storage_key, storage_value in storage_type.items():
                # Combine the storage path with the component file name
                this_combo = storage_value / component_value
                l_2[storage_key] = this_combo  # Map storage type to file path

            # Map component name to its storage locations
            l_1[component_key] = l_2

        # Map run name to its components and their storage locations
        l_0[run_name] = l_1

    logging.info(f'Generated summary files for {len(l_0)} runs')
    return l_0

In [36]:
def clear_prod_files(run_names, viz_dir_prod, viz_dir_storage):
    """
    Moves CSV files from the the production directory to the storage directory

    Parameters:
    run_names (list of str): List of run names to generate the desired run files.
    viz_dir_prod (Path): Path to the production visualizer directory.
    viz_dir_storage (Path): Path to the storage visualizer directory.
    """
    # Step 0: Get desired run files
    desired_run_files = generate_visualizer_files_from_run_name(
        run_names, viz_dir_prod, viz_dir_storage)

    logging.info(
        f'Found {len(desired_run_files)} from {len(run_names)}')

    # Move all model output CSVs from PRODUCTION to STORAGE
    prod_files = viz_dir_prod.rglob('*.csv')
    for f in prod_files:
        
        if 'model_run_inventory' not in f.name:
            target_path = viz_dir_storage / f.name

            try:
                if not target_path.exists():
                    logging.info(f'MOVING to STORAGE:\n\t{f.name}')
                    shutil.move(str(f), str(viz_dir_storage))
                else:
                    # Check if sizes are the same
                    if f.stat().st_size == target_path.stat().st_size:
                        # If size and name are the same, assume they are the same file and don't copy again
                        logging.info(
                            f'File already in production; dont need to copy again \n\t{f.name}')
                        continue
            except FileNotFoundError:
                logging.error(f'File not found: {f}')
            except OSError as e:
                logging.error(f'Error moving file {f}: {e}')

In [32]:
def move_desired_run_files(run_names, viz_dir_prod, viz_dir_storage):
    """
    Moves desired run files from the storage directory to the production directory 
    if they do not already exist in the production directory.

    Parameters:
    run_names (list of str): List of run names to generate the desired run files.
    viz_dir_prod (Path): Path to the production visualizer directory.
    viz_dir_storage (Path): Path to the storage visualizer directory.

    """
    # Get desired run files
    desired_run_files = generate_visualizer_files_from_run_name(run_names, viz_dir_prod, viz_dir_storage)

    for run_name, component_dict in desired_run_files.items():
        for component, paths in component_dict.items():
            try:
                # Check if the file exists in the production directory
                if not paths["PROD"].exists():
                    logging.info(
                        f'File: {paths["STORAGE"].name} is not found in PROD; moving from STORAGE'
                    )
                    shutil.move(paths["STORAGE"], paths["PROD"])
                else:
                    logging.info(
                        f'File: {paths["STORAGE"].name} is already found in PROD; nothing to be done'
                    )
            except FileNotFoundError:
                logging.debug(f'Not found: {paths["STORAGE"].name}')
            except OSError as e:
                logging.debug(
                    f'File cannot be moved or accessed: {paths["STORAGE"].name}, Error: {e}'
                )

#### Curate lists with runs to stage for the visualizer

In [33]:
# manual curation of runs to include

pba50_runs = [ 
              'PBA50_FBP',  # base year 2020 summaries
              'PBA50_NP',   # base year 2020 summaries
              ]

pba50p_runs = [
               'PBA50Plus_DBP_InitialRun_v7',
               'PBA50Plus_NP_InitialRun_v8',
               'PBA50Plus_NoProject_v7',
               'PBA50Plus_NoProject_v9',
               'PBA50Plus_NoProject_v10_zn_znmod_upd',
               'PBA50Plus_NoProject_v11',
               'PBA50Plus_Draft_Blueprint_v6',
               'PBA50Plus_Draft_Blueprint_v7_znupd',
               'PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix',
               ]

multiple_runs = {'run_names': pba50_runs + pba50p_runs,
                 'viz_dir_prod': viz_dir_prod, 'viz_dir_storage': viz_dir_storage}
multiple_runs

{'run_names': ['PBA50_FBP',
  'PBA50_NP',
  'PBA50Plus_DBP_InitialRun_v7',
  'PBA50Plus_NP_InitialRun_v8',
  'PBA50Plus_NoProject_v7',
  'PBA50Plus_NoProject_v9',
  'PBA50Plus_NoProject_v10_zn_znmod_upd',
  'PBA50Plus_NoProject_v11',
  'PBA50Plus_Draft_Blueprint_v6',
  'PBA50Plus_Draft_Blueprint_v7_znupd',
  'PBA50Plus_Draft_Blueprint_v8_znupd_nodevfix'],
 'viz_dir_prod': WindowsPath('M:/urban_modeling/baus/PBA50Plus/BAUS_Visualizer_PBA50Plus_Files/PRODUCTION'),
 'viz_dir_storage': WindowsPath('M:/urban_modeling/baus/PBA50Plus/BAUS_Visualizer_PBA50Plus_Files')}

#### Run the functions

In [34]:
# write out run inventory for use in visualiser
write_list_to_file(multiple_runs['run_names'], model_run_inventory_path)

In [37]:
# step CLEARS out production viz files, moving them to storage
clear_prod_files(**multiple_runs)

05/21/2024 02:57:33 PM - INFO - Generated summary files for 11 runs
05/21/2024 02:57:33 PM - INFO - Found 11 from 11
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_county_summary_growth.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_interim_zone_output_allyears.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_juris_summary_growth.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_new_buildings_summary.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_superdistrict_summary_growth.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_DBP_InitialRun_v7_taz1_summary_growth.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_Draft_Blueprint_v6_county_dr_growth.csv
05/21/2024 02:57:33 PM - INFO - MOVING to STORAGE:
	PBA50Plus_Draft_Blueprint_v6_county_summary_growth.csv
05/21/2024 02:57:33 PM - IN

In [38]:
# step MOVES files out of storage to production folder
# consider wrapping these into one step calling both
move_desired_run_files(**multiple_runs)

05/21/2024 02:57:46 PM - INFO - Generated summary files for 11 runs
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_new_buildings_summary.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_taz1_summary_growth.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_interim_zone_output_allyears.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_juris_dr_growth.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_county_dr_growth.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_superdistrict_dr_growth.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_juris_summary_growth.csv is not found in PROD; moving from STORAGE
05/21/2024 02:57:46 PM - INFO - File: PBA50_FBP_county_summary_growth.csv is not found in PROD; moving from STORAGE
05/21/202