# Prep AWS Runs

 1. Read input zipfile for PFRA model from s3
 - Perform Scale Tests
 - Write input zipfiles for:
     - Scale Test events
     - Production Run events
     - Custom events


In [1]:
import sys;sys.path.append('../core')
from ras_ops import *
from scale_tests import *
from time import time, strftime
import pytz
from io import StringIO

### User Input

In [2]:
#Make File Paths
project_name = "Sacramento"
model_subtype = "H00"
model_name = "P03"

if "P" in model_name:
    model_type = "Pluvial"
else:
    model_type = "Fluvial"
    
model_s3path = "s3://pfra/{0}/BaseModels/{0}_{1}_{2}.zip".format(project_name, model_name, model_subtype)
forcing_s3path = "s3://pfra/{0}/Data/Forcing/{1}/Outputs/{0}_{2}_Forcing.zip".format(project_name, model_type, model_name)

print(model_s3path)
print(forcing_s3path)

custom_events_csv = r'C:\Users\Administrator\Desktop\event.csv'
df = pd.read_csv(custom_events_csv, header=None)
custom_events = df[0].values

events = 'CustomEvents'
#events = 'ScaleTest'
#events = 'ProductionRuns'

# Initialize list to store errors
errors=[]

s3://pfra/Sacramento/BaseModels/Sacramento_P03_H00.zip
s3://pfra/Sacramento/Data/Forcing/Pluvial/Outputs/Sacramento_P03_Forcing.zip


#### Read data from S3

In [3]:
model = RasModel(model_s3path)

### Verify Required Files are present, and no 'development' files included

In [4]:
model.contents

['Backup.g01',
 'ManningsN/Manning_nValues.hdf',
 'ManningsN/Manning_nValues.tif',
 'ManningsN/Thumbs.db',
 'ProjectionFile.prj',
 'Sacramento_P03_H00.b01',
 'Sacramento_P03_H00.bco01',
 'Sacramento_P03_H00.c02',
 'Sacramento_P03_H00.color_scales',
 'Sacramento_P03_H00.dss',
 'Sacramento_P03_H00.g02',
 'Sacramento_P03_H00.g02.hdf',
 'Sacramento_P03_H00.IC.O01',
 'Sacramento_P03_H00.p01',
 'Sacramento_P03_H00.p01.blf',
 'Sacramento_P03_H00.p01.hdf',
 'Sacramento_P03_H00.prj',
 'Sacramento_P03_H00.rasmap',
 'Sacramento_P03_H00.rasmap.backup',
 'Sacramento_P03_H00.u01',
 'Sacramento_P03_H00.x02',
 'Terrain/ModelTerrain.hdf',
 'Terrain/ModelTerrain.MergedInputs.tif',
 'Terrain/ModelTerrain.vrt',
 'Terrain/Thumbs.db']

In [5]:
prjData = RasProject(model)

In [6]:
planData = RasPlan(model, prjData.current_plan)

In [7]:
localPlan = HDFPlanFile(model, planData)



In [8]:
localPlan.mandatoryFiles
#pl.Path(planData.plan_path).suffix

['Sacramento_P03_H00.b01', 'Sacramento_P03_H00.x02', 'Sacramento_P03_H00.c02']

In [9]:
forcing_s3path

's3://pfra/Sacramento/Data/Forcing/Pluvial/Outputs/Sacramento_P03_Forcing.zip'

In [10]:
forcing_data = RASForcing(forcing_s3path)

In [11]:
#forcing_data.domainForcing

#### Perform Test Suite 

*Error class not invoked yet.*

This will need to be updated. For now any errors from scale tests are written to a list, but will not impede moving forward.

In [12]:
scaleTest = ModelCheck(model)
errors = scaleTest.runTests(model, prjData, planData, localPlan, forcing_data)
errors

[{'Geometry': ['Backup.g01', 'Sacramento_P03_H00.g02'],
  'cFile': ['Sacramento_P03_H00.c02', 'Sacramento_P03_H00.color_scales']}]

#### Prep Event list if needed

In [13]:
jobsList=[]

if events == 'ProductionRuns': 
    events = forcing_data.productionEvents
    
elif events == 'ScaleTest':
    events = scaleTestEvents(model_s3path)
    
else:
    events = custom_events.tolist()
    
for e in events: print(e)

E0001


#### Compare BC's in Forcing with BC's in Model

*If the names are not consistent the Main Function will not create input files. Please verify all BC's that should be included are in the __Forcing__ File and in the __Model__ File*

In [17]:
domains_in_forcing = list(forcing_data.domainForcing.keys())
for d in domains_in_forcing:
    bcs = forcing_data.domainForcing[d]
    event_types = bcs.keys()
    for event_type in event_types:
        bc_names = bcs[event_type]['BCName'].keys()
    
    print("BC's found in Forcing File:\n\tDomain: {}: {}".format( d, list(bc_names)))
    
print("\nBC's found in Model File:")
for bc in localPlan.planHydrograpList:
    #continue
    print(bc)

BC's found in Forcing File:
	Domain: D36: ['D36']
BC's found in Forcing File:
	Domain: D37: ['D37', 'L18', 'L19', 'L20', 'L21', 'L22', 'L23', 'L24', 'L25', 'L26', 'L27', 'L29', 'L31', 'L32', 'L33', 'L40', 'L41', 'L42', 'L43', 'L44', 'L87']
BC's found in Forcing File:
	Domain: D38: ['D38', 'L13', 'L14', 'L17']
BC's found in Forcing File:
	Domain: D39: ['D39']
BC's found in Forcing File:
	Domain: D40: ['D40']
BC's found in Forcing File:
	Domain: D41: ['D41', 'L07', 'L08', 'L28', 'L30', 'L34', 'L35', 'L36', 'L37', 'L38', 'L39']
BC's found in Forcing File:
	Domain: D42: ['D42']
BC's found in Forcing File:
	Domain: RiverD37: ['F02']
BC's found in Forcing File:
	Domain: RiverD38: ['F01']

BC's found in Model File:
Event Conditions/Unsteady/Boundary Conditions/Flow Hydrographs/SA: D37 BCLine: L18
Event Conditions/Unsteady/Boundary Conditions/Flow Hydrographs/SA: D37 BCLine: L19
Event Conditions/Unsteady/Boundary Conditions/Flow Hydrographs/SA: D37 BCLine: L20
Event Conditions/Unsteady/Boundar

### *Main Function*

In [18]:
localPlan = HDFPlanFile(model, planData)
# Loop over events
for event in events:
    print(event)
    start=time()
    # Loop over domains in the model
    for domain in localPlan.domains:
        
        try:
            # There may be domains that have no boundaries requiring forcing updates
            updateRasData = GetModelEventData(event, forcing_data)
            
            # Verify Data exists in at least one domain for the event
            forcing_vectors = 0
            
            for k, v in updateRasData.items(): 
                forcing_vectors += len(v)
                
            assert forcing_vectors > 0, "No Forcing for this event found, please verify event exists"
            
        except KeyError as e:
            print("!---\tVerify {} does not have BC's that need to be overwritten, no data found in forcing json".format(domain))


        # Loop over all boundary conditions per domain
        try:
            forcing_data_boundary_names = updateRasData[domain].keys()
        
            for bc in forcing_data_boundary_names:

                # modelDescriptor is first key in forcing: Fluvial, H06, etc.
                modelDescriptor = updateRasData[domain][bc]['modelDescriptor']
                tseries = updateRasData[domain][bc]['tseries']

                # Get full path in hdf to update
                bcLocalPath = bc_hdf_path(model, domain, bc, localPlan.planHydrograpList)
                #print('Processing {}'.format(bcLocalPath))

                # update plan file with event data
                localPlan.updateHydrograph(bcLocalPath, tseries)

                # Update start, end date for time series
                Start_Date = updateRasData[domain][bc]['start_date']
                End_Date = updateRasData[domain][bc]['end_date']

                # Update model runtime for all boundary conditions
                localPlan.updateSimDates(event, Start_Date, End_Date)
                #localPlan.cellCount
        except KeyError as e:
            print("!---\tVerify {} does not have BC's that need to be overwritten, no data found in forcing json".format(domain))



    rawModelFiles =  prepEventZipFiles(event, model, localPlan, prjData, modelDescriptor)
    
    updateComputeFiles(model, rawModelFiles, event, Start_Date, End_Date)
    
    runName = pushModeltoS3(rawModelFiles)
    
    jobsList.append(runName)
    
    # grab cell count for SOM
    cellCount = np.ones(len(jobsList))*localPlan.cellCount
    print('{} Completed in {} seconds'.format(runName, time() - start))
    
tmpFile = localPlan.hdfLocal.filename
localPlan.hdfLocal.close()
del localPlan; os.remove(tmpFile)

E0001
!---	Verify RiverD42 does not have BC's that need to be overwritten, no data found in forcing json
s3://pfra/Sacramento/P03/H06/E0001/Sacramento_P03_H06_E0001_in.zip Completed in 3.7154548168182373 seconds


### Create job file

In [19]:
# Create df with cell count, jobs
s3JobsDF = pd.DataFrame(data = {'jobs':jobsList, 'cells':cellCount} )

# add timestamp to filename
#sendTime = datetime.datetime.now( pytz.timezone("UTC") ).strftime("%Y-%m-%d %I:%M:%S %Z")
sendTime = datetime.datetime.now( pytz.timezone("UTC") ).strftime("%Y%m%d_%I%M%S_%Z")

# make path to save csv on s3
s3RunFile = model.s3path.replace('BaseModels','Jobs').replace('.zip','_{}.csv'.format(sendTime))
s3JobsDF

Unnamed: 0,jobs,cells
0,s3://pfra/Sacramento/P03/H06/E0001/Sacramento_...,225404.0


### Write job file to s3

In [20]:
csv_buffer = StringIO()
s3JobsDF.to_csv(csv_buffer, sep=",", index=False)
sendJobs= s3.Object('pfra', s3RunFile.replace('s3://pfra/','')).put(Body=csv_buffer.getvalue())
print('File written to {}'.format(s3RunFile))

File written to s3://pfra/Sacramento/Jobs/Sacramento_P03_H00_20191003_043141_UTC.csv


# END