# Process Fire Activities from HMS #
Created on March 17, 2025

#### Overall I/O directories ###

In [3]:
import os

HMS_raw   = '/proj/ie/proj/GSA-EMBER/BlueSky/HMS/raw_data'  # Raw data downloaded from HMS
HMS_fips  = '/proj/ie/proj/GSA-EMBER/BlueSky/HMS/hms_fips'  # HSM data with FIPS code

hms_yyyy  = '2023'

startdate = f'{hms_yyyy}0101'
enddate   = f'{hms_yyyy}1231'

hms_us_label  = f'hms_{hms_yyyy}_us'
hms_ca_label  = f'hms_{hms_yyyy}_canada'

HMS_split_us = f'/proj/ie/proj/GSA-EMBER/BlueSky/HMS/hms_split/{hms_us_label}' # HMS data split by landuse
HMS_split_ca = f'/proj/ie/proj/GSA-EMBER/BlueSky/HMS/hms_split/{hms_ca_label}' # HMS data split by landuse

### Download HMS fire for selected episode ###
HMS 5 satellite text-based fire detections data are used to spatially and temporally apportion associated wildland fire perimeters. Additionally, the HMS data provide activity where no other data sources exist. The HMS detects are pre-processed to differentiate detections over wildlands versus croplands and the US versus Canada. All HMS-related scripts are provided in the package under the activity/hms path.
A script, gethms.py, is supplied to fetch daily point files from the NOAA ftp to the system. The year and outpath variables should be adjusted as necessary to match the target year and the system output path for the daily HMS files.

In [None]:
if not os.path.exists(HMS_raw):
    os.makedirs(HMS_raw)

!python get_HMS.py --output {HMS_raw} {startdate} {enddate}

### Add FIPS to HMS data ###
The next step in the HMS process is to subset the satellite detects to those north of 24.5Â° latitude and associate a county or province FIPS code with each detect. This is done with the hms_set_fips.py script. Within this script the path to the northamerica_county_province_state_latlon.shp file (supplied under the geospatial directory) will need to be updated to match the location on the system path. A wrapper script, gen_hms.csh, can be used to set the command line arguments including the input path (inpath) to the HMS daily files, the writable output path (outpath) for the daily script output files, and the target year.

In [None]:
# Generate the HMS with FIPS
if not os.path.exists(HMS_fips):
    os.makedirs(HMS_fips)

fips_shape_path = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/activity/hms/northamerica_county_province_state_latlon.shp'


## Call hms_set_fips.py directly instead of using wrapper script
!python hms_set_fips.py {HMS_raw} {HMS_fips} {fips_shape_path} {startdate} { enddate}

### Canada HSM Splitting ###
Once all dates have been run through hms_set_fips.py the satellite data is split into files based on country and landcover. For Canada the script hms_canada_cropsplit.py uses a combined wildland and cropland raster to associate landcover with each detect and split into a crop type (labeled hms_YYYY_canada_crops.csv) and wildland type file (labeled hms_YYYY_wildland.csv). The script should be updated to reflect the location of the fbp_2011_250m_canada_fccsmap_crops_fix.nc file supplied in the geospatial folder of the package. 

In [None]:
import os

fccmap_fpath   = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/geospatial/fbp_2011_250m_canada_fccsmap_crops_fix.nc'
fuelxref_fpath = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/activity/hms/fbp_aci_fuel_xref.csv'

if not os.path.exists(HMS_split_ca):
    os.makedirs(HMS_split_ca)

!python hms_canada_cropsplit.py {HMS_fips} {HMS_split_ca} {fccmap_fpath} {fuelxref_fpath} {hms_ca_label} {startdate} {enddate}

### US HSM Splitting ###
Similarly, the hms_fccs.py script is used to split US detects based on landcover. This script should be updated to reflect the location of the lf_220_2022_cdl_2022_120m_fix.nc raster file supplied in the geospatial folder. Outputs of this script are written to hms_YYYY_fccscdl_grass.csv, hms_YYYY_fccscdl_cdlcrops.csv, hms_YYYY_fccscdl_wildland.csv, hms_YYYY_fccscdl_mwcrops.csv, and hms_YYYY_fccscdl_tree_crops.csv. Any changes to target year or input paths should be changed in both the hms_fccs.py and hms_canada_cropsplit.py scripts. 

In [None]:
import os

fcc_fpath      = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/geospatial/lf_220_2022_cdl_2022_120m_fix.nc'
fccxref_fpath  = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/activity/hms/LF20_FCCS_220.csv'
cdlxref_fpath  = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/activity/hms/cdl_cover.csv'
costcy_fpath   = '/proj/ie/proj/GSA-EMBER/BlueSky/from_EPA/ember_package/activity/hms/flat_costcy_07dec2021.csv'

if not os.path.exists(HMS_split_us):
    os.makedirs(HMS_split_us)

!python hms_fccs.py {HMS_fips} {HMS_split_us} {fcc_fpath} {fccxref_fpath} {cdlxref_fpath} {costcy_fpath} {hms_us_label} {startdate} {enddate}

### Flint Hill operation treatment ###
Those detects that occur on grasslands in the Flint Hills counties of Kansas during the annual burning season should be split from the rest of the grasslands for a separate processing stream described in in section 6. The split_fh.py helper script can be used to split the Flint Hills counties and annual date range into a separate file. More information on finding the annual date ranges is in section 6 of this document.

In [None]:
## Run split_fh.py directly here
import pandas as pd
import os

if hms_yyyy == '2023':
  fh_counties = ['20015','20017','20019','20031','20035','20049','20061','20073','20111','20115',
                 '20127','20139','20149','20161','20197','20205','20207','40071','40105','40113','40147']
  fh_startdate   = '20230203'
  fh_enddate     = '20230501'

if hms_yyyy == '2024': # Flint Hills operation in 2024: https://www.ksfire.org/docs/Flint_Hills_2024_Summary.pdf
  fh_counties = ['20015','20017','20019','20031','20035','20049','20061','20073','20111','20115',
                 '20127','20139','20149','20161','20197','20205','20207','40071','40105','40113','40147']
  fh_startdate   = '20240214'
  fh_enddate     = '20240429'

fh_dates = [x.strftime('%Y%m%d') for x in list(pd.date_range(fh_startdate,fh_enddate))]
fn = os.path.join(HMS_split_us,f'{hms_us_label}_fccscdl_grass.csv')
dtype = {'YearDay': str, 'fips': str, 'gday': str, 'fccs': str, 'Ecosys': str, 'Time': str}
df = pd.read_csv(fn, dtype=dtype)
idx = ((df['gday'].isin(fh_dates)) & (df['fips'].astype(int).astype(str).str.zfill(5).isin(fh_counties)))
df[idx].to_csv(os.path.join(HMS_split_us,f'{hms_us_label}_fccscdl_grass_fh.csv'), index=False)
df[~ idx].to_csv(os.path.join(HMS_split_us,f'{hms_us_label}_fccscdl_grass_nofh.csv'), index=False)

### Concat U.S. wildland and nonFlintHill grassland fires ###
Two files containing HMS detects will be imported into SF2PY, one for Canada detects over wildlands and one for US detects over wildlands, including non-Flint Hills burn season grasslands. To create the US file the non-Flint Hills grasslands HMS file should be concatenated with the US wildland file. Example SF2PY input and Flint Hills HMS files from the 2023 EMBER run are included in this package for reference.

In [4]:
## US wildland + grassland no FlintHill
import pandas as pd

# Define input files
input_files = [
    os.path.join(HMS_split_us,f"{hms_us_label}_fccscdl_wildland.csv"),
    os.path.join(HMS_split_us,f"{hms_us_label}_fccscdl_grass_nofh.csv")
]

# Output file
output_file = f"/proj/ie/proj/GSA-EMBER/BlueSky/HMS/hms_split/{hms_us_label}/{hms_us_label}_fccscdl_wildland_grass_nofh.csv"

column_names = pd.read_csv(input_files[0], nrows=1, usecols=range(12)).columns  # Read only the first 12 columns

concat_df = pd.DataFrame()
for ifl in input_files:
    df = pd.read_csv(ifl, header=None, skiprows=1, usecols=range(12),dtype=object)
    concat_df = pd.concat([concat_df, df], ignore_index=True)

concat_df.columns = column_names
#display(concat_df)

# Save the result to a CSV file
concat_df.to_csv(output_file, index=False)