In [3]:
#Written by GC -- 03/24

#This notebook contains code to generate SEDs dists for each NTS sheet. The basic idea here is to 1) determine the number of days where the FWI exceeds some threshold 
#(see https://doi.org/10.1016/j.scitotenv.2023.161831), 2) calculate the average fire duration per NTS sheet and then multiply these values to get the avg SEDs per NTS sheet. 

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import contextily as cx
import libpysal
import rioxarray as rxr
from ztp_funcs import ztp_dist



Unnamed: 0,Value,RelativeFrequency,Name
0,1,0.178561,SED
1,2,0.251899,SED
2,3,0.236906,SED
3,4,0.167104,SED
4,5,0.094295,SED
5,6,0.044341,SED
6,7,0.017872,SED
7,8,0.006303,SED


In [5]:
#Calc number of days above some FWI value using the ecozone FWI cutoffs found in https://doi.org/10.1016/j.scitotenv.2023.161831

#Read in NTS sheet shapefiles
df_nts=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\nts_snrc\\nts_snrc_250k.shp")

#Read in Ecozne shapefiles. The boreal shield ecozone conatins an EW split unique to this paper, hence the
#extra shape files
df_ecozones=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\ecozone_shp\\Ecozones\\ecozones.shp")
df_bsw=gpd.read_file("C:\\Users\\GiovanniCorti\\Documents\\BSW.shp")
df_bse=gpd.read_file("C:\\Users\\GiovanniCorti\\Documents\\BSE.shp")
df_ecozones=pd.concat([df_ecozones,df_bsw,df_bse],ignore_index=True)
df_ecozones=df_ecozones.drop([15])
df_ecozones.reset_index(inplace=True)

#Reproject to equal-area EASE grid
df_nts=df_nts.to_crs(epsg=6931)
df_ecozones=df_ecozones.to_crs(epsg=6931)


  df_ecozones=pd.concat([df_ecozones,df_bsw,df_bse],ignore_index=True)


In [10]:
#Add FWI cutoff values to ecozone dataframe. Where https://doi.org/10.1016/j.scitotenv.2023.161831 
#does not provide an FWI value I just assume 19
tdf=df_ecozones.copy()
FWI_50=[19,19,19,19,19,19,10.5,17.7,19.6,16.7,20.8,19,11.5,11.5,9.5,11.5,23.2,30,11.5,19,20.8,19,19,19,15.8,12]
tdf['FWI_50']=FWI_50
FWI50_dict=pd.Series(tdf['FWI_50'].values,index=tdf['ZONE_NAME']).to_dict()
#print(FWI50_dict)

#Checks NTS sheets to see if they stradle ecozone boundaries and takes weighted average for more accurate 
#FWI cutoff values if needed
nts_list,FWI_cf_list=[],[]
for index, row in df_nts.iterrows():
    int_poly=df_ecozones.intersection(row.geometry).area
    ar=int_poly/np.sum(int_poly)
    tdf['Area_ratio']=ar
    ar_dict=tdf.groupby('ZONE_NAME')['Area_ratio'].sum().to_dict()
    FWI_cf=sum(ar_dict[k]*FWI50_dict[k] for k in ar_dict)
    FWI_cf_list.append(FWI_cf)
    nts_list.append(row['NTS_SNRC'])

#Create FWI cutoff dataframe
tdict = {'NTS_SNRC': nts_list, 'FWI_cf': FWI_cf_list} 
df_fwi_cf=pd.DataFrame(tdict)
df_nts_fwi_cf=df_nts.merge(df_fwi_cf, on='NTS_SNRC')
df_nts_fwi_cf=df_nts_fwi_cf.to_crs(epsg=3857)
df_nts_fwi_cf.to_file(r"C:\Users\GiovanniCorti\Documents\Wildfire\FWI_cf.shp")
#df_nts_fwi_cf

In [4]:
#Threshold FWI files and save to Y: drive
for index, row in df_nts_fwi_cf.iterrows():
    NTS_code=row['NTS_SNRC']
    FWI_cf=row['FWI_cf']
    dir_path="Y:\\client-data\\demo_projects\\climate85\\Working_data\\NARR_weather_csvs\\NTS_SNRC_"+NTS_code
    if os.path.exists(dir_path):
        #Read in fwi_era data
        fwi_fp=dir_path+"\\fwi_era_NTS_SNRC_"+NTS_code+".csv"
        fwi_df=pd.read_csv(fwi_fp)
        #Threshold data and save to Y: drive
        fwi_cf_df=fwi_df[fwi_df['fwi']>FWI_cf]
        fwi_cf_df.to_csv(dir_path+"\\fwi_era_cf_NTS_SNRC_"+NTS_code+".csv")


NameError: name 'os' is not defined

In [11]:
#Read in number of node-days (ERA5 nodes per NTS sheet). Used to calc percentage of ERA5 days above some FWI cutoff
nd_df=pd.read_csv(r"C:\\Users\\GiovanniCorti\\Downloads\\node_days.csv")
df_nts_fwi_cf=df_nts_fwi_cf.merge(nd_df, on='NTS_SNRC')
#214 is number of days between 1 April and 1 Nov, an interval that contains >99% of the fires in the NFDB
df_nts_fwi_cf['met_samples']=df_nts_fwi_cf['node_days']*214*11
#df_nts_fwi_cf['met_samples']=df_nts_fwi_cf['node_days']*df_nts_fwi_cf['FS_length']*11

#Include only NTS sheets where an ign is defined
df_igns=gpd.read_file(r"C:\Users\GiovanniCorti\Documents\Wildfire\ign_v2.shp")
df_nts=df_igns[df_igns['ign_num']>0]

#For each NTS sheet, read ERA5 weather CSVs and calc percentage above FWI cutoff
nts_list,per_list=[],[]
for index, row in df_nts_fwi_cf.iterrows():
    nts_code=row['NTS_SNRC']
    nts_list.append(nts_code)
    FWI_df=pd.read_csv("Y:\\client-data\\demo_projects\\climate85\\Working_data\\NARR_weather_csvs\\NTS_SNRC_"+nts_code+"\\fwi_era_NTS_SNRC_"+nts_code+".csv")
    per_list.append(len(FWI_df[FWI_df['fwi']>row['FWI_cf']])/row['met_samples'])

#Create geodataframe w/ percentage FWI above
tdict = {'NTS_SNRC': nts_list, 'fwi_per': per_list} 
tdf=pd.DataFrame(tdict)
FWI_above_df=df_nts.merge(tdf, on='NTS_SNRC')

    NTS_SNRC            NAME_ENG             NOM_FRA  SRID  SHAPE_AREA  \
0       002E             BOTWOOD             BOTWOOD     6         2.0   
1       002F         WESLEYVILLE         WESLEYVILLE     6         2.0   
2       001K           TREPASSEY           TREPASSEY     6         2.0   
3       001L        ST. LAWRENCE        ST. LAWRENCE     6         2.0   
4       001M           BELLEORAM           BELLEORAM     6         2.0   
..       ...                 ...                 ...   ...         ...   
621     117B  DAVIDSON MOUNTAINS  DAVIDSON MOUNTAINS     6         2.0   
622     117D     HERSCHEL ISLAND     HERSCHEL ISLAND     6         4.0   
623     096A    JOHNNY HOE RIVER    JOHNNY HOE RIVER     6         2.0   
624     096E        NORMAN WELLS        NORMAN WELLS     6         2.0   
625     096F         MAHONY LAKE         MAHONY LAKE     6         2.0   

     SHAPE_LEN   ign_num                                           geometry  \
0          6.0  1.033763  POLYGO

In [12]:
#Now that we have percenatge of FWI above cutoff we need average fire duration. This is done on an ecozone basis using fire progression data

nfdb_df=gpd.read_file("C:\\Users\\GiovanniCorti\\Documents\\Wildfire\\AreaBurned2010-2020\\NBAC_2010_2020.shp")
nfdb_df['EDATE']=pd.to_datetime(nfdb_df['EDATE'],exact=False)
nfdb_df['SDATE']=pd.to_datetime(nfdb_df['SDATE'],exact=False)

nfdb_df['Duration']=nfdb_df['EDATE']-nfdb_df['SDATE']

#Replace 0 day duration with 1 day duration
nfdb_df['Duration']=nfdb_df['Duration'].where(nfdb_df['Duration']!=np.timedelta64(0, 'D'),np.timedelta64(1, 'D'))
#Assume smalls fires (less than 10 ha) with no duration have 1 day duration
nfdb_df.loc[(nfdb_df['POLY_HA'] <10) & (np.isnat(nfdb_df['Duration'])),'Duration']=np.timedelta64(1, 'D')

In [13]:
#Read in ecozone shapefile and reproject to matching CRS. These are the standard ecozones instead 
#of the split boreal-sheild version used above
ecozone_df=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\ecozone_shp\\Ecozones\\ecozones.shp")
ecozone_df=ecozone_df.to_crs(nfdb_df.crs)

#Determine which ecozone each fire is in
#Size cutoff here is that same as used in the ign dist calculation and is 
#inteded, in part, to implicitly account for fire supression 
nfdb_lg_df=nfdb_df[nfdb_df['POLY_HA']>1]
for index, row in nfdb_lg_df.iterrows():
    a=ecozone_df.intersection(row['geometry'].centroid)
    ez_num=ecozone_df.iloc[a[~a.is_empty].index[0]]['ECOZONE']
    nfdb_lg_df.loc[index,'ECOZONE']=ez_num

#Groupby ecozone and calc average duration
#Round long fires down to 30 days
nfdb_lg_df.loc[nfdb_lg_df['Duration'] > np.timedelta64(30, 'D') ,'Duration']=np.timedelta64(30, 'D')
dur_df=nfdb_lg_df.groupby('ECOZONE')['Duration'].mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [14]:
#df_nts=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\nts_snrc\\nts_snrc_250k.shp")
#ecozone_df=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\ecozone_shp\\Ecozones\\ecozones.shp")

#Reproj to equal-area CRS before we do calculation for sheets that straddle ecozone boundaries
df_nts=df_nts.to_crs(epsg=6931)
ecozone_df=ecozone_df.to_crs(epsg=6931)

#Merge duration dataframe with ecozone dataframe
tdf=ecozone_df.copy()
tdf=tdf.merge(dur_df, on='ECOZONE')

#Setup dict w/ avg duration for ecozone
dur_dict=pd.Series(tdf['Duration'].values,index=tdf['ZONE_NAME']).to_dict()

#Calc fire duration for each NTS sheet in a way that accounts for NTS sheets that straddle 
#ecozone boundaries
nts_list,fd_list=[],[]
for index, row in df_nts.iterrows():
    int_poly=ecozone_df.intersection(row.geometry).area
    ar=int_poly/np.sum(int_poly)
    tdf['Area_ratio']=ar
    ar_dict=tdf.groupby('ZONE_NAME')['Area_ratio'].sum().to_dict()
    fd=sum(ar_dict[k]*(dur_dict[k]/np.timedelta64(1, 'D')) for k in ar_dict)
    fd_list.append(fd)
    nts_list.append(row['NTS_SNRC'])

#Use ign dataframe here so we can skip NTS sheets w/ no fires.
df_igns=gpd.read_file(r"C:\Users\GiovanniCorti\Documents\Wildfire\ign_v2.shp")
df_nts=df_igns[df_igns['ign_num']>0]
tdict = {'NTS_SNRC': nts_list, 'avg_fd': fd_list} 
df_fd=pd.DataFrame(tdict)
df_fd=df_nts.merge(df_fd, on='NTS_SNRC')

In [15]:
#Round min fire duration to 1 and calc avg SEDs
df_fd.loc[df_fd['avg_fd'].between(0.01,1),'avg_fd']=1
df_SED=df_fd[['NTS_SNRC','avg_fd']].merge(FWI_above_df, on='NTS_SNRC')

#-1 here is used as I calc SEDs in excess of 1 and then add the inital SED back in. 
#This generally ensures a min of at least 1 SED.
df_SED['SED']=(df_SED['fwi_per']*(df_SED['avg_fd']-1))+1

#Spatial smoothing using nearest 8 NTS sheets
W = libpysal.weights.KNN.from_dataframe(df_SED, k=8)
# row-normalise weights
W.transform = "r"
df_SED["SED_sm"] = libpysal.weights.lag_spatial(W, df_SED["SED"])

In [16]:
df_SED = gpd.GeoDataFrame(df_SED, crs="EPSG:6931", geometry='geometry')
df_SED.loc[df_SED['SED_sm'].le(1),'SED_sm']=1
#df_SED.to_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_var_FSL.shp")

In [24]:
#Create SED dist .csvs for each NTS sheet. These csvs are not possion distributed and only feature 2 possible SED values.
for index, row in df_SED.iterrows():
    num=np.round(row['SED_sm'],2)
    vals=np.round(np.modf(num),2)
    SED_num_ls=[]
    per_ls=[]
    
    if num==0:
        #r1=[0,1.0]
        SED_num_ls.append(0)
        per_ls.append(100)
    elif np.isnan(num):
        pass
    else:
        SED_num_ls.extend((int(vals[1]),int(vals[1]+1)))
        per_ls.extend((100*np.round(1-vals[0],2),100*np.round(vals[0],2)))
    tdict = {'sp_ev_days': SED_num_ls, 'pct': per_ls} 
    dt=pd.DataFrame(tdict)
    #Write .csvs to Y drive
    dt.to_csv("Y:client-data\\demo_projects\\climate85\\Working_data\\SED_dist_v2\\sed_dist_"+row['NTS_SNRC']+'.csv',index=False)

In [18]:
SED_v2=gpd.read_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_v2.shp")

#tuned_SED_dict={'Northern Arctic': None, 'Arctic Cordillera': None, 'Southern Arctic': None, 'Taiga Cordillera': 5.1148, 'Taiga Plain': 2.0798, 
#'Taiga Shield': 5.8558, 'Boreal Cordillera': 3.8857, 'Boreal PLain': 2.2377, 'Pacific Maritime': 4.7376, 'Hudson Plain': 2.52615, 
#'Montane Cordillera': 3.3510, 'Prairie': None, 'Atlantic Maritime': None, 'MixedWood Plain': None, 'Boreal Shield West': 3.3460, 
#'Boreal Shield East': 2.9079}

tuned_SED_dict={'Northern Arctic': 1.0, 'Arctic Cordillera': 5.8558, 'Southern Arctic': (2.0798+5.8558)/2, 'Taiga Cordillera': 5.1148, 'Taiga Plain': 2.0798, 
'Taiga Shield': 5.8558, 'Boreal Cordillera': 3.8857, 'Boreal PLain': 2.2377, 'Pacific Maritime': 4.7376, 'Hudson Plain': 2.52615, 
'Montane Cordillera': 3.3510, 'Prairie': (2.2377+3.3510)/2, 'Atlantic Maritime': 2.9079, 'MixedWood Plain': 2.9079, 'Boreal Shield West': 3.3460, 
'Boreal Shield East': 2.9079}

#Calc number of days above some FWI value using the ecozone FWI cutoffs found in https://doi.org/10.1016/j.scitotenv.2023.161831

#Read in NTS sheet shapefiles
df_nts=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\nts_snrc\\nts_snrc_250k.shp")

#Read in Ecozne shapefiles. The boreal shield ecozone conatins an EW split unique to this paper, hence the
#extra shape files
df_ecozones=gpd.read_file("C:\\Users\\GiovanniCorti\\Downloads\\ecozone_shp\\Ecozones\\ecozones.shp")
df_bsw=gpd.read_file("C:\\Users\\GiovanniCorti\\Documents\\BSW.shp")
df_bse=gpd.read_file("C:\\Users\\GiovanniCorti\\Documents\\BSE.shp")
df_ecozones=pd.concat([df_ecozones,df_bsw,df_bse],ignore_index=True)
df_ecozones=df_ecozones.drop([15])
df_ecozones.reset_index(inplace=True)

#Reproject to equal-area EASE grid
df_nts=df_nts.to_crs(epsg=6931)
df_ecozones=df_ecozones.to_crs(epsg=6931)

SED_v2=df_nts.merge(SED_v2[["NTS_SNRC","SED_sm"]],on="NTS_SNRC")

ez_ls,nts_ls,SED_ls=[],[],[]
for index, row in SED_v2.iterrows():
    int_poly=df_ecozones.intersection(row.geometry).area
    ar=int_poly/np.sum(int_poly)
    tez_ls=list(df_ecozones.iloc[ar[ar!=0.0].index]['ZONE_NAME'])    
    tar_ls=list(ar[ar!=0.0])

    ar_dict=dict(zip(tez_ls,tar_ls))
    SED_val=sum(ar_dict[k]*tuned_SED_dict[k] for k in ar_dict)
    SED_ls.append(SED_val)
    nts_ls.append(row['NTS_SNRC'])
    
tdict = {'NTS_SNRC': nts_ls, 'SED': SED_ls} 
tdf=pd.DataFrame(tdict)
df_SEDv3=df_nts.merge(tdf)

df_SEDv3['SED'][df_SEDv3['SED']<1]=1
#df_SEDv3.to_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_v3.shp")
df_SEDv3['SED'].min()
df_SEDv3.to_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_v3.shp")

  df_ecozones=pd.concat([df_ecozones,df_bsw,df_bse],ignore_index=True)
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_SEDv3['SED'][df_SEDv3['SED']<1]=1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/in

In [19]:
SED_v2=gpd.read_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_v2.shp")
#SED_v2.groupby('ECOZONE_NAME')['SED_sm'].mean()

ez_ls,nts_ls=[],[]
for index, row in df_nts.iterrows():
    #a=row.geometry.centroid.within(df_ecozones.geometry)
    
    int_poly=df_ecozones.intersection(row.geometry).area
    ar=int_poly/np.sum(int_poly)
    
    if pd.isnull(ar).all():
        ez_ls.append(None)
    elif len(ar[ar!=0.0])>0:
        #print(ar[ar!=0.0])
        ez_ls.append(df_ecozones.iloc[ar[ar!=0.0].idxmax()]['ZONE_NAME'])

    
    
    #if len(a[a])==1:
        #print(df_ecozones.iloc[a[a].index-1]['ZONE_NAME'])
        #ez_ls.append(df_ecozones.iloc[a[a==True].index]['ZONE_NAME'].values[0])
    else:
        ez_ls.append(None)
    nts_ls.append(row["NTS_SNRC"])

tdict = {'NTS_SNRC': nts_ls, 'ZONE_NAME': ez_ls} 
tdf=pd.DataFrame(tdict)
SED_v2=SED_v2.merge(tdf)
EZ_SED_mu_df=SED_v2.groupby("ZONE_NAME")["SED_sm"].mean()
   

In [20]:
sed_norm_ls,nts_ls=[],[]
for index, row in SED_v2.iterrows():
    if row["ZONE_NAME"]!=None:
        sed_norm_ls.append(row["SED_sm"]/EZ_SED_mu_df.loc[row["ZONE_NAME"]])
        
    else:
        sed_norm_ls.append(None)
    nts_ls.append(row["NTS_SNRC"])
    
tdict = {'NTS_SNRC': nts_ls, 'SED_norm': sed_norm_ls} 
tdf=pd.DataFrame(tdict)
SED_v4=df_SEDv3.merge(tdf)
SED_v4["adj_SED"]=SED_v4["SED"]*SED_v4["SED_norm"]
#Must have at least 1 SED
SED_v4['adj_SED'][SED_v4['adj_SED']<1]=1
#Very large SED values can cause crashes due to memory limits. Additionally, the physics of very large fires is poorly captured by 
#models like BP3+ beacuse they are not atmospherically coupled
SED_v4['adj_SED'][SED_v4['adj_SED']>7]=7
SED_v4.to_file("C:/Users/GiovanniCorti/Documents/Wildfire/SED_fst.shp")

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  SED_v4['adj_SED'][SED_v4['adj_SED']<1]=1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  SED_v4['adj_SED'][SED_

In [29]:
#Create SED dist .csvs for each NTS sheet. These csvs are poisson distributed
for index, row in SED_v4.iterrows():
    ztp_df=ztp_dist('SED', row["adj_SED"])
    tdict = {'sp_ev_days': ztp_df["Value"], 'pct': np.round(ztp_df["RelativeFrequency"]*100,2)} 
    dt=pd.DataFrame(tdict)
    #dt.to_csv("Y:client-data\\demo_projects\\climate85\\Working_data\\SED_dist_v2\\sed_dist_"+row['NTS_SNRC']+'.csv',index=False)
    dt.to_csv("C:\\Users\\GiovanniCorti\\Desktop\\BP3Inputs\\"+row['NTS_SNRC']+"\\sed_dist_"+row['NTS_SNRC']+'.csv',index=False)
    