# Angola Wildfires correlations

In [None]:
# imports
#fires
import random
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import matplotlib
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from shapely.geometry import Polygon, MultiPolygon

#precipitation
import xarray as xr
import pooch
import tempfile
import s3fs
import boto3
import botocore
import datetime
import numpy as np

# 1. prepare THE ANGOLA 2020 DATA - wildfires, precipitation, temperature

## WILDFIRES

### get wildfire data

In [None]:
# code to retrieve and load the data
url_climateaction = '/home/jovyan/shared/Data/Projects/Wildfires/ClimateAction_countries.shp'
Dataset = gpd.read_file(url_climateaction)  # need to update to OSF and pooch.retrieve

In [None]:
#angola fires - kamil 14.12.2023
Dataset["IDate"] = pd.to_datetime(Dataset["IDate"])
Dataset["FDate"] = pd.to_datetime(Dataset["FDate"])
# Choose country
country = "Angola"
angolaextent =  (9, 26, -21, -4) #lon,lat
angolalongitude=slice(9,26)
angolalatitudeR=slice(-4,-21) #reversed order for temperature data
angolalatitude=slice(-21,-4) 

# Define the start month (sm) and end month (em) of the dry period (May-September)
year = 2020
angolaYear = Dataset[(Dataset["name"] == country) &
                        (Dataset["IDate"].dt.year == year) & (Dataset["FDate"].dt.year == year)     ]
angola_sorted = angolaYear.sort_values("Area_Acres", ascending = False)
#angola_sorted #angola_sorted
#angolaYear

### WILDFIRE FUNCTIONS

In [None]:
# Function to update min and max from coordinates
def update_min_max(coords, lon_min, lon_max, lat_min, lat_max):
    #global lon_min, lat_min, lon_max, lat_max
    for x, y in coords:
        lon_min = min(lon_min, round(x,2))
        lat_min = min(lat_min, round(y,2))
        lon_max = max(lon_max, round(x,2))
        lat_max = max(lat_max, round(y,2))      
    return lon_min, lon_max, lat_min, lat_max

#find the fire's size (rectangle with max and min longitude and latitude) from the its geometry polygon
def find_fire_rect(FireData):
    # Initialize max and min values
    lon_min, lat_min= float('inf'), float('inf')
    lon_max, lat_max= float('-inf'), float('-inf')
    # List to store all coordinates
    all_coords = []
        
    for multipolygon in FireData["geometry"]:
        if isinstance(multipolygon, MultiPolygon):
            for polygon in multipolygon.geoms: #the geometry can contain Multypolygon
                # Update min and max from exterior
                lon_min, lon_max, lat_min, lat_max = update_min_max(polygon.exterior.coords, lon_min, lon_max, lat_min, lat_max)

                # Update min and max from interiors
                for interior in polygon.interiors: #the geometry can contain just Polygon
                    lon_min, lon_max, lat_min, lat_max = update_min_max(interior.coords, lon_min, lon_max, lat_min, lat_max)
        elif isinstance(multipolygon, Polygon):
            lon_min, lon_max, lat_min, lat_max = update_min_max(multipolygon.exterior.coords, lon_min, lon_max, lat_min, lat_max)
                
    return {'lon_min': lon_min, 'lon_max': lon_max, 'lat_min':lat_min, 'lat_max':lat_max} # returns Dictionary

In [None]:
def find_fire_dates(FireData):
    idate = FireData["IDate"]
    fdate = FireData["FDate"]

    idatestr = idate.dt.strftime('%Y-%m-%d').iloc[0] #day when the fire started 
    fdatestr = fdate.dt.strftime('%Y-%m-%d').iloc[0] #day when the fire finished 

    # our precipitation data are samplet just once in a month - the first day of it
    idateMstr = idate.dt.strftime('%Y-%m-01').iloc[0] #month (first day of it) when the fire started 
    fdateMstr = fdate.dt.strftime('%Y-%m-01').iloc[0] #month (first day of it) when the fire finished 
    
    # Calculate the difference and find the midpoint
    midpoint = idate + (fdate - idate) / 2
    length = fdate - idate;
    # Extract the month    
    middle_day = int(midpoint.dt.dayofyear.iloc[0]) 
    flength = length.dt.days.iloc[0];

    return {'idatestr':idatestr,'idateMstr':idateMstr, 'fdatestr':fdatestr, 'fdateMstr':fdateMstr, 'middle_day':middle_day, 'flength':flength}  # returns Dictionary


## PRECIPITATION CHIRPS

### get the precipitation data

In [None]:
# helper functions for Precipitation

def pooch_load(filelocation=None,filename=None,processor=None):
    shared_location='/home/jovyan/shared/Data/Projects/Precipitation' # this is different for each day
    user_temp_cache=tempfile.gettempdir()
    
    if os.path.exists(os.path.join(shared_location,filename)):
        file = os.path.join(shared_location,filename)
    else:
        file = pooch.retrieve(filelocation,known_hash=None,fname=os.path.join(user_temp_cache,filename),processor=processor)

    return file

In [None]:
# code to retrieve and load the data

years=range(2020,2021) # the years you want. we want 1981 till 2023
file_paths=['https://data.chc.ucsb.edu/products/CHIRPS-2.0/global_daily/netcdf/p25/chirps-v2.0.'+str(year)+'.days_p25.nc' for year in years] # the format of the files
filenames=['chirps-v2.0.'+str(year)+'.days_p25.nc' for year in years] # the format of the files

downloaded_files=[ pooch_load(fpath,fname) for (fpath,fname) in zip(file_paths,filenames)] # download all of the files

#### open data as xarray
chirps_data = xr.open_mfdataset(
    downloaded_files, combine="by_coords"
)  # open the files as one dataset


In [None]:
chirps_data_angola = chirps_data.sel(latitude=angolalatitude, longitude=angolalongitude) #latitude has to be reversed
chirps_data_angola

In [None]:
#function to compute the precipitation means for one fire
def get_fire_precipitation(chirps_data,rect,firedate):
    onefirelat = slice(rect['lat_min'], rect['lat_max']) #longitude slice - must be integers
    #if int(onefirelat.start) == int(onefirelat.stop): #for too small files, make the range one unit larger
    #    onefirelat=slice(rect['lat_min']-1, rect['lat_max']+1);
    #print(onefirelat)
    
    onefirelon = slice(rect['lon_min'], rect['lon_max']) #longitude slice
    #if int(onefirelon.start) == int(onefirelon.stop): #for too small files, make the range one unit larger
    #    onefirelon=slice(rect['lon_min']-1, rect['lon_max']+1);
    #print(onefirelon)
    
    onefireextent =  (rect['lon_min'],rect['lon_max'], rect['lat_min'],rect['lat_max']) #lon,lat
    timeslice=slice(firedate['idatestr'], firedate['fdatestr'])
    
    precipFire = chirps_data.sel(latitude=onefirelat, longitude=onefirelon,time=timeslice).precip #select the size of the fire -     
    if precipFire.shape[1]==0: #latitude
        onefirelat=slice(rect['lat_min']-1, rect['lat_max']+1);
    if precipFire.shape[2]==0: #longitude
        onefirelon=slice(rect['lon_min']-1, rect['lon_max']+1);    
    precipFire = chirps_data.sel(latitude=onefirelat, longitude=onefirelon,time=timeslice).precip #select the size of the fire 
    
    precipFireMeanTime = precipFire.mean(dim="time") #mean over the duration of the fire 
    precipFireMean = precipFire.mean() #mean over all dimensions; the duration of the fire #dim="time"
    precipFireMeanComputed = precipFireMean.compute() #To resolve this, you need to compute the Dask array to get a NumPy array before calling .item().
    if pd.isna(precipFireMeanComputed):
        print(f"Longitude from: {rect['lon_min']}, to: {rect['lon_max']}")
        print(onefirelon) 
        print(f"Latitude from: {rect['lat_min']}, to: {rect['lat_max']}")
        print(onefirelat)
        display(precipFire)
        print(precipFire.shape) #'time: 5latitude: 0 longitude: 8
    return precipFireMeanComputed.item(), precipFireMeanTime #returns mean value over all dimensions and mean over time

## Temperature ERA

### get temperature data

In [None]:
tempr_path = "~/shared/Data/Projects/Albedo/ERA/Temperature-003.nc"
ds_tempr = xr.open_dataset(tempr_path)
#ds_tempr  # the variable name is t2m (temprature at 2m)

In [None]:
#get Celsius temperature

airtemp = ds_tempr.sel(latitude=angolalatitudeR, longitude=angolalongitude) #angola
airtemp['t2mC'] = airtemp['t2m'] - 273.15
airtemp['t2mC'].attrs['units'] = 'C'
airtemp['t2mC'].attrs['long_name'] = '2 metre temperature in C'

CTemp = airtemp.t2mC
#CTemp

#airtemp.sel(time=slice("2015-07-01", "2015-08-01")).squeeze()
#airtemp.sel(time="2015-07-01").squeeze()

In [None]:
# function to compute mean lai for one fire
def get_fire_lai(veg, rect, firedate):
    timeslice = slice(firedate['idateMstr'], firedate['fdateMstr'])  # "2015-07-01"
    onefirelat = slice(rect['lat_max'], rect['lat_min'])  # longitude slice - must be integers #reversed here
    onefirelon = slice(rect['lon_min'], rect['lon_max'])  # longitude slice

    veg = OneFire['LC_descrip']
    if veg.eq("Forest").any():
        lai = grib_ds['lai_hv']
    else:
        lai = grib_ds['lai_lv']

    laiFire = lai.sel(time=timeslice, latitude=onefirelat, longitude=onefirelon)  # select the size of the fire
    if laiFire.shape[1] == 0:  # latitude
        onefirelat = slice(rect['lat_max'] + 1, rect['lat_min'] - 1);
    if laiFire.shape[2] == 0:  # longitude
        onefirelon = slice(rect['lon_min'] - 1, rect['lon_max'] + 1);
    laiFire = lai.sel(time=timeslice, latitude=onefirelat, longitude=onefirelon)  # select the size of the fire

    laiFireMeanTime = laiFire.mean(dim="time")
    laiFireMean = laiFire.mean()
    laiFireMeanComputed = laiFireMean.compute()  # To resolve this, you need to compute the Dask array to get a NumPy array before calling .item().
    if pd.isna(laiFireMeanComputed):
        print(f"Longitude from: {rect['lon_min']}, to: {rect['lon_max']}")
        print(onefirelon)
        print(f"Latitude from: {rect['lat_min']}, to: {rect['lat_max']}")
        print(onefirelat)
        display(laiFire)
    return laiFireMeanComputed.item(), laiFireMeanTime  # returns mean value over all dimensions and mean over time

In [None]:
#function to compute mean temperature for one fire
def get_fire_temperature(CTemp,rect,firedate):
    timeslice= slice(firedate['idateMstr'], firedate['fdateMstr']) #"2015-07-01"
    #print(timeslice)
    onefirelat = slice(rect['lat_max'], rect['lat_min']) #longitude slice - must be integers #reversed here
    #if round(onefirelat.start) == round(onefirelat.stop): #for too small files, make the range one unit larger
    #    onefirelat=slice(rect['lat_max']+1, rect['lat_min']-1);
    #onefirelat

    onefirelon = slice(rect['lon_min'], rect['lon_max']) #longitude slice
    #if round(onefirelon.start) == round(onefirelon.stop): #for too small files, make the range one unit larger
    #    onefirelon=slice(rect['lon_min']-1, rect['lon_max']+1);
           
    CTempFire = CTemp.sel(time=timeslice,latitude=onefirelat, longitude=onefirelon) #select the size of the fire     
    if CTempFire.shape[1]==0: #latitude
        onefirelat=slice(rect['lat_max']+1, rect['lat_min']-1);
    if CTempFire.shape[2]==0: #longitude
        onefirelon=slice(rect['lon_min']-1, rect['lon_max']+1);
    CTempFire = CTemp.sel(time=timeslice,latitude=onefirelat, longitude=onefirelon) #select the size of the fire         
    
    CTempFireMeanTime = CTempFire.mean(dim="time")
    CTempFireMean = CTempFire.mean()
    CTempFireMeanComputed = CTempFireMean.compute() #To resolve this, you need to compute the Dask array to get a NumPy array before calling .item().
    if pd.isna(CTempFireMeanComputed):
        print(f"NaN temperature data for this wildfire ")
        print(f"Longitude from: {rect['lon_min']}, to: {rect['lon_max']}")
        print(onefirelon) 
        print(f"Latitude from: {rect['lat_min']}, to: {rect['lat_max']}")
        print(onefirelat)
        display(CTempFire)
    return CTempFireMeanComputed.item(), CTempFireMeanTime #returns mean value over all dimensions and mean over time

## WIND

In [None]:
windgribpath = "era_data/monthlywind_ang2020.grib" 
windgrib_ds = xr.open_dataset(windgribpath, engine='cfgrib')
#windgrib_ds

In [None]:
windu = windgrib_ds['u10']
windv = windgrib_ds['v10']
windsp = np.sqrt(windu**2 + windv**2) #Wind speed
winddirection = np.degrees(np.arctan2(windv, windu)) #Wind direction

In [None]:
#function to compute mean lai for one fire
def get_fire_windspeed(windsp,rect,firedate):
    timeslice= slice(firedate['idateMstr'], firedate['fdateMstr']) #"2015-07-01"
    onefirelat = slice(rect['lat_max'], rect['lat_min']) #longitude slice - must be integers #reversed here
    onefirelon = slice(rect['lon_min'], rect['lon_max']) #longitude slice
           
    windspFire = windsp.sel(time=timeslice,latitude=onefirelat, longitude=onefirelon) #select the size of the fire     
    if windspFire.shape[1]==0: #latitude
        onefirelat=slice(rect['lat_max']+1, rect['lat_min']-1);
    if windspFire.shape[2]==0: #longitude
        onefirelon=slice(rect['lon_min']-1, rect['lon_max']+1);
    windspFire = windsp.sel(time=timeslice,latitude=onefirelat, longitude=onefirelon) #select the size of the fire         
    
    windspFireMeanTime = windspFire.mean(dim="time")
    windspFireMean = windspFire.mean()
    windspFireMeanComputed = windspFireMean.compute() #To resolve this, you need to compute the Dask array to get a NumPy array before calling .item().
    if pd.isna(windspFireMeanComputed):
        print(f"Longitude from: {rect['lon_min']}, to: {rect['lon_max']}")
        print(onefirelon) 
        print(f"Latitude from: {rect['lat_min']}, to: {rect['lat_max']}")
        print(onefirelat)
        display(windspFire)
    return windspFireMeanComputed.item(), windspFireMeanTime #returns mean value over all dimensions and mean over time

## NDVI and dNBR

In [None]:
# Load NDVI and DNBR
#1. OG
#ndvi 1
ndvi_pre_forest_1 = np.load('ndvi_pre_og_largest_fires_redone_Forest.npy')
ndvi_pre_shrubs_1 = np.load('ndvi_pre_og_largest_fires_redone_Shrubs.npy')
ndvi_pre_herb_1 = np.load('ndvi_pre_og_largest_fires_redone_Herbaceous Vegetation.npy')

#ndvi 2
ndvi_pre_forest_2 = np.load('ndvi_pre_og_largest_fires_redone_pt2Forest.npy')
ndvi_pre_shrubs_2 = np.load('ndvi_pre_og_largest_fires_redone_pt2Shrubs.npy')
ndvi_pre_herb_2 = np.load('ndvi_pre_og_largest_fires_redone_pt2Herbaceous Vegetation.npy')

#ndvi concatenate
ndvi_pre_forest = np.hstack((ndvi_pre_forest_1,ndvi_pre_forest_2))
print(ndvi_pre_forest.shape)
ndvi_pre_shrubs = np.hstack((ndvi_pre_shrubs_1,ndvi_pre_shrubs_2))
print(ndvi_pre_shrubs.shape)
ndvi_pre_herb = np.hstack((ndvi_pre_herb_1,ndvi_pre_herb_2))
print(ndvi_pre_herb.shape)

#dnbr 1
dnbr_forest_1 = np.load('dnbr_og_largest_fires_redone_Forest.npy')
dnbr_shrubs_1 = np.load('dnbr_og_largest_fires_redone_Shrubs.npy')
dnbr_herb_1 = np.load('dnbr_og_largest_fires_redone_Herbaceous Vegetation.npy')

#dnbr 2
dnbr_forest_2 = np.load('dnbr_og_largest_fires_redone_pt2Forest.npy')
dnbr_shrubs_2 = np.load('dnbr_og_largest_fires_redone_pt2Shrubs.npy')
dnbr_herb_2 = np.load('dnbr_og_largest_fires_redone_pt2Herbaceous Vegetation.npy')

#dnbr concatenate
dnbr_forest = np.hstack((dnbr_forest_1,dnbr_forest_2))
print(dnbr_forest.shape)
dnbr_shrubs = np.hstack((dnbr_shrubs_1,dnbr_shrubs_2))
print(dnbr_shrubs.shape)
dnbr_herb = np.hstack((dnbr_herb_1,dnbr_herb_2))
print(dnbr_herb.shape)

In [None]:
#2. Remapped
ndvi_pre_forest_1_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_Forest.npy')
ndvi_pre_shrubs_1_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_Shrubs.npy')
ndvi_pre_herb_1_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_Herbaceous Vegetation.npy')

#ndvi 2
ndvi_pre_forest_2_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_pt2Forest.npy')
ndvi_pre_shrubs_2_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_pt2_Shrubs.npy')
ndvi_pre_herb_2_remapped = np.load('ndvi_pre_remapped_largest_fires_redone_pt2Herbaceous Vegetation.npy')

#ndvi concatenate
ndvi_pre_forest_remapped = np.hstack((ndvi_pre_forest_1_remapped,ndvi_pre_forest_2_remapped))
print(ndvi_pre_forest_remapped.shape)
ndvi_pre_shrubs_remapped = np.hstack((ndvi_pre_shrubs_1_remapped,ndvi_pre_shrubs_2_remapped))
print(ndvi_pre_shrubs_remapped.shape)
ndvi_pre_herb_remapped = np.hstack((ndvi_pre_herb_1_remapped,ndvi_pre_herb_2_remapped))
print(ndvi_pre_herb_remapped.shape)

#dnbr 1
dnbr_forest_1_remapped = np.load('dnbr_remapped_largest_fires_redone_Forest.npy')
dnbr_shrubs_1_remapped = np.load('dnbr_remapped_largest_fires_redone_Shrubs.npy')
dnbr_herb_1_remapped = np.load('dnbr_remapped_largest_fires_redone_Herbaceous Vegetation.npy')

#dnbr 2
dnbr_forest_2_remapped = np.load('dnbr_remapped_largest_fires_redone_pt2Forest.npy')
dnbr_shrubs_2_remapped = np.load('dnbr_remapped_largest_fires_redone_pt2Shrubs.npy')
dnbr_herb_2_remapped = np.load('dnbr_remapped_largest_fires_redone_pt2Herbaceous Vegetation.npy')

#dnbr concatenate
dnbr_forest_remapped = np.hstack((dnbr_forest_1_remapped,dnbr_forest_2_remapped))
print(dnbr_forest_remapped.shape)
dnbr_shrubs_remapped = np.hstack((dnbr_shrubs_1_remapped,dnbr_shrubs_2_remapped))
print(dnbr_shrubs_remapped.shape)
dnbr_herb_remapped = np.hstack((dnbr_herb_1_remapped,dnbr_herb_2_remapped))
print(dnbr_herb_remapped.shape)

In [None]:
#variables to use 
ndvi_pre_forest, ndvi_pre_shrubs, ndvi_pre_herb, dnbr_forest, dnbr_shrubs, dnbr_herb
ndvi_pre_forest_remapped, ndvi_pre_shrubs_remapped, ndvi_pre_herb_remapped, dnbr_forest_remapped, dnbr_shrubs_remapped, dnbr_herb_remapped

In [None]:
## Load NDVI and DNBR
#ndvi_pre_forest_ = np.load('ndvi_pre_remapped_largest_fires_Forest_good.npy')
#ndvi_pre_shrubs = np.load('ndvi_pre_remapped_largest_fires_Shrubs.npy')
#ndvi_pre_herb = np.load('ndvi_pre_remapped_largest_fires_Herbaceous Vegetation.npy')

#dnbr_forest = np.load('dnbr_remapped_largest_fires_Forest_good.npy')
#dnbr_shrubs = np.load('dnbr_remapped_largest_fires_Shrubs.npy')
#dnbr_herb = np.load('dnbr_remapped_largest_fires_Herbaceous Vegetation.npy')
#ndvi_pre_herb.shape

### compute correlations matrix

In [None]:
from scipy.stats import pearsonr

def get_correlations(df):
    #display(df)
    corr=df.corr() 
    p_values = pd.DataFrame(np.zeros_like(corr), columns=corr.columns, index=corr.index)

    nsig = 0
    #Calculate p-values for each pair of columns
    for col1 in df.columns:
        for col2 in df.columns:        
            if col1 != col2:
                #print(f"Col1 {col1}, Col2 {col2}")
                p_values.loc[col1, col2] = pearsonr(df[col1], df[col2])[1]
                if p_values.loc[col1, col2]>0.05:
                    corr.loc[col1, col2]=np.nan
                else:
                    nsig=nsig+1
            else:
                corr.loc[col1, col2]=np.nan
    return corr,p_values, nsig//2

# 2. COMPUTE

### select, compute and plot wildfires

In [None]:
vegetationTypes=['Forest','Shrubs','Herbaceous Vegetation','Agriculture']
vegetation = vegetationTypes[1]
# Sample 50 fires from 2020 and set a seed for reproducibility

selectionType = ['Random','Largest']
fireselection=selectionType[1] #random or largest?

num_fires = 50

#the random selection of wildfires
if fireselection=='Random':
    angolaYear_vegetation = angolaYear[angolaYear["LC_descrip"] == vegetation]
    angolaYear_sample = angolaYear_vegetation.sample(min(num_fires,angolaYear_vegetation.shape[0]), random_state = 42)

#lets try the largest wildfires
if fireselection=='Largest':
    angolaYear_vegetation = angola_sorted[angola_sorted["LC_descrip"] == vegetation] #100 largest fires with this vegetation type
    angolaYear_sample = angolaYear_vegetation[0:num_fires]

#angolaYearDs = xr.Dataset.from_dataframe(angolaYear_sample)
#angolaYear_sample["precip"]=np.nan;
#angolaYear_sample["t2mC"]=np.nan;

#angolaYear_sample.shape
#angolaYear_sample
#angola_sorted

In [None]:
#create datasets over more types of vegetation
vegetationTypes=['Forest','Shrubs','Herbaceous Vegetation'] # , #


angolaYear_sample_combined = gpd.GeoDataFrame(columns=angolaYear.columns) #type(angolaYear) - geopandas.geodataframe.GeoDataFrame

for vegetation2 in vegetationTypes:
    #the random selection of wildfires
    if fireselection=='Random':
        angolaYear_vegetation = angolaYear[angolaYear["LC_descrip"] == vegetation2]
        angolaYear_sample = angolaYear_vegetation.sample(min(num_fires,angolaYear_vegetation.shape[0]), random_state = 42)

    #lets try the largest wildfires
    if fireselection=='Largest':
        angolaYear_vegetation = angola_sorted[angola_sorted["LC_descrip"] == vegetation2] #100 largest fires with this vegetation type
        angolaYear_sample = angolaYear_vegetation[0:num_fires]
    
    angolaYear_sample_combined = pd.concat([angolaYear_sample_combined, angolaYear_sample])
    

#angolaYearDs = xr.Dataset.from_dataframe(angolaYear_sample)
#angolaYear_sample["precip"]=np.nan;
#angolaYear_sample["t2mC"]=np.nan;
ndvi_combined = np.concatenate((ndvi_pre_forest, ndvi_pre_shrubs, ndvi_pre_herb)) #needs to be same order as in vegetationTypes
dnbr_combined = np.concatenate((dnbr_forest, dnbr_shrubs, dnbr_herb)) #needs to be same order as in vegetationTypes

print (angolaYear_sample_combined.shape, ndvi_combined.shape)


In [None]:
combined = False #we want combined correlations
if combined:
    angolaYear_sample = angolaYear_sample_combined
    vegetation = 'Combined'
combined    

In [None]:
gribpath = os.path.expanduser("~/shared-public/Jintasaurus_Skip_Energico/era_data/monthlylai_ang2020.grib")
grib_ds = xr.open_dataset(gribpath, engine='cfgrib')

In [None]:
#iterate over all fires in the sample
precipvals = []
tempvals = []
rectvals = []
laivals = []
windspvals = []
middledays = []
firelengths = []
for j in range(len(angolaYear_sample)):
    OneFire = angolaYear_sample[j:j+1]
    rect = find_fire_rect(OneFire)    
    firedate = find_fire_dates(OneFire)
    #print(f"Fire from: {firedate['idatestr']} {firedate['idateMstr']}, to: {firedate['fdatestr']} {firedate['fdateMstr']}")
    precipFireMean, precipFireTime = get_fire_precipitation(chirps_data,rect,firedate)
    CTempFireMean, _ = get_fire_temperature(CTemp,rect,firedate)
    laiFireMean, _ = get_fire_lai(vegetation,rect,firedate)
    windspFireMean, _ = get_fire_windspeed(windsp,rect,firedate)
    rectvals.append(rect)
    precipvals.append(precipFireMean)
    tempvals.append(CTempFireMean)
    laivals.append(laiFireMean)
    windspvals.append(windspFireMean)
    middledays.append(firedate['middle_day'])    
    firelengths.append(firedate['flength'])    
    
#add these computed values to the pandas dataframe of selected wildfires    
angolaYear_sample['precip'] = precipvals
angolaYear_sample['t2mC']=tempvals
angolaYear_sample['rectangle']=rectvals
angolaYear_sample['lai']=laivals
angolaYear_sample['windspeed']=windspvals
angolaYear_sample['middleday']=middledays
angolaYear_sample['firelength']=firelengths

if combined: #if we want combined correlations
    angolaYear_sample['ndvi'] = ndvi_combined
    angolaYear_sample['dnbr'] = dnbr_combined
elif vegetation == 'Forest':
    angolaYear_sample['ndvi'] = ndvi_pre_forest
    angolaYear_sample['dnbr'] = dnbr_forest
elif vegetation == 'Shrubs':
    angolaYear_sample['ndvi'] = ndvi_pre_shrubs
    angolaYear_sample['dnbr'] = dnbr_shrubs   
elif vegetation == 'Herbaceous Vegetation':
    angolaYear_sample['ndvi'] = ndvi_pre_herb
    angolaYear_sample['dnbr'] = dnbr_herb

angolaYear_sample.shape
#precipFireMeanTime


In [None]:
import math

#TODO  - add month of the year, add vegetation type

#create new dataframe for only the to be correlated variables 
df=pd.DataFrame({"dNBR":angolaYear_sample['dnbr'], "Burnt Area":angolaYear_sample['Area_Acres'],"Fire Duration":angolaYear_sample['firelength'], "Pre-fire NDVI":angolaYear_sample['ndvi'], 
                 "Temperature":angolaYear_sample['t2mC'],"Precipitation":angolaYear_sample['precip'],
                 "Wind Speed":angolaYear_sample['windspeed'], "Day of Year":angolaYear_sample['middleday'],
                 }) #,"LeafAreaIndex":angolaYear_sample['lai']
df.dropna(subset=['Temperature'], inplace=True) # drop rows containig the NaN values in temperature
df.dropna(subset=['Wind Speed'], inplace=True) # drop rows containig the NaN values in windspeed
df.dropna(subset=['Pre-fire NDVI'], inplace=True) # drop rows containig the NaN values in ndvi
df.dropna(subset=['dNBR'], inplace=True) # drop rows containig the NaN values in dnbr
print(df.shape)
corr,p_values,nsig = get_correlations(df)
#display(df)
print(corr.shape, p_values.shape)

NN = df.shape[1] * (df.shape[1]-1) / 2 #number of plots
cols = int(math.sqrt(nsig)+1)  #plot just the significant scatteplots
rows = round(math.sqrt(nsig))
print (str(NN) + ' ' + str(nsig) + ' ' + str(cols) + ' '+ str(rows))
#plot the matrix of scatterplots for all of them
fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(16, 12.5)) # df.shape[1]
fig.suptitle(f"Vegetation: {vegetation}    Wildfires: {fireselection} ")
n = 0 #current number of plot
for index1, column_name1 in enumerate(df.columns[0:df.shape[1]-1]):        
    for index2, column_name2 in enumerate(df.columns[index1+1:df.shape[1]]):  
        if column_name2 == 'Burnt Area' or column_name2 == 'Fire Duration' or column_name2 == 'Pre-fire NDVI':
            if p_values.iloc[index1, index1+index2+1]<0.05:
                c = n % cols #column number
                r = n // cols #row number
                n=n+1
                df.plot(ax=axs[r,c], kind='scatter', x=column_name2, y=column_name1, fontsize=12)            
# plt.savefig('figures/scatterplots_'+vegetation+'_'+fireselection+'.svg')

#p_values

In [None]:
import math

vegetation = 'Forest'

#create new dataframe for only the to be correlated variables 
df=pd.DataFrame({"dNBR":angolaYear_sample['dnbr'], "Burnt Area":angolaYear_sample['Area_Acres'],"Fire Duration":angolaYear_sample['firelength'], "Pre-fire NDVI":angolaYear_sample['ndvi'], 
                 "Temperature":angolaYear_sample['t2mC'],"Precipitation":angolaYear_sample['precip'],
                 "Wind Speed":angolaYear_sample['windspeed'], "Day of Year":angolaYear_sample['middleday'],
                 }) #,"LeafAreaIndex":angolaYear_sample['lai']
df.dropna(subset=['Temperature'], inplace=True) # drop rows containig the NaN values in temperature
df.dropna(subset=['Wind Speed'], inplace=True) # drop rows containig the NaN values in windspeed
df.dropna(subset=['Pre-fire NDVI'], inplace=True) # drop rows containig the NaN values in ndvi
df.dropna(subset=['dNBR'], inplace=True) # drop rows containig the NaN values in dnbr
print(df.shape)
corr,p_values,nsig = get_correlations(df)
#display(df)
print(corr.shape, p_values.shape)

NN = df.shape[1] * (df.shape[1]-1) / 2 #number of plots
cols = int(math.sqrt(nsig)+1)  #plot just the significant scatteplots
rows = round(math.sqrt(nsig))
print (str(NN) + ' ' + str(nsig) + ' ' + str(cols) + ' '+ str(rows))
#plot the matrix of scatterplots for all of them

#plot 1
fig, ax = plt.subplots() # df.shape[1]
column_name1 = 'Burnt Area'
column_name2 = 'Fire Duration'
df.plot(ax=ax, kind='scatter', x=column_name2, y=column_name1)  
ax.set_title('Vegetation: Forest',fontsize=12)
ax.set_xlabel('Fire Duration (Days)', fontsize=12)
ax.set_ylabel('Burnt Area (Acres)', fontsize=12)
plt.savefig('figures/scatterplots_'+vegetation+'_'+fireselection+'_redone_plot1.png')

#plot 2
fig, ax = plt.subplots() # df.shape[1]
column_name1 = 'dNBR'
column_name2 = 'Burnt Area'
df.plot(ax=ax, kind='scatter', x=column_name2, y=column_name1)  
ax.set_title('Vegetation: Forest',fontsize=12)
ax.set_xlabel('Burnt Area (Acres)', fontsize=12)
ax.set_ylabel('dNBR', fontsize=12)
plt.savefig('figures/scatterplots_'+vegetation+'_'+fireselection+'_redone_plot2.png')

#plot 3
fig, ax = plt.subplots() # df.shape[1]
column_name1 = 'dNBR'
column_name2 = 'Pre-fire NDVI'
df.plot(ax=ax, kind='scatter', x=column_name2, y=column_name1)  
ax.set_title('Vegetation: Forest',fontsize=12)
ax.set_xlabel('Pre-fire NDVI', fontsize=12)
ax.set_ylabel('dNBR', fontsize=12)
plt.savefig('figures/scatterplots_'+vegetation+'_'+fireselection+'_redone_plot3.png')


In [None]:
#plot the statistics of the correlations for the 
#now plotting only significant correlations
vegetation_all = ['Forest', 'Shrubs', 'Herbaceous Vegetation']

for v in vegetation_all:

    import seaborn as sns

    df = df.drop('Day of Year', axis=1,errors='ignore')
    corr,p_values,nsig = get_correlations(df)
    fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(10, 8))# 
    fig.suptitle(f"Vegetation: {v}", fontsize=16) #    Wildfires: {fireselection}
    ax_s=sns.heatmap(corr,annot=False, cmap="PiYG", vmin=-0.7, vmax=0.7) #
    ax_s.figure.axes[-1].axes.set_ylabel('Pearson''s R',size=16)
    ax_s.figure.axes[-1].tick_params(labelsize=16)


    
    # Increase the size of labels on both x-axis and y-axis
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=16)  # Adjust the fontsize as per your preference
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=16)  # Adjust the fontsize as per your preference

    # Update the heatmap labels with correlation coefficients and p-values
    for i in range(corr.shape[0]):
        for j in range(corr.shape[1]):
            if i != j and p_values.iloc[i, j]<0.05:            
                corrtxt = '{:.2f}'.format(corr.iloc[i, j]) + ';\np=' + '{:.2f}'.format(p_values.iloc[i, j])
                ax.text(j+0.5, i+0.5, corrtxt, ha='center', va='center', fontsize=16, color='black')
    plt.subplots_adjust(bottom=0.15)
    plt.savefig('figures/correlations_'+v+'_'+fireselection+'_redone.svg',dpi=300, bbox_inches = "tight")


# PLOT individual fires

In [None]:
# Function to format the text to show counts
def absolute_value(val):
    a = int(round(val/100.*vegetation_counts.sum()))
    return a

color_dict = {    
    'Forest':  'darkgreen',
    'Herbaceous Vegetation':  'salmon',
    'Shrubs': 'cornflowerblue',
    'Agriculture': 'gold',
    'Herbaceous Wetland': 'purple'
}

# Create a custom color map
aYu = ['Forest', 'Herbaceous Vegetation', 'Shrubs', 'Agriculture', 'Herbaceous Wetland']# aYuangolaYear['LC_descrip'].unique()
mycmap = matplotlib.colors.ListedColormap([color_dict[x] for x in aYu])

vegetation_counts = angolaYear['LC_descrip'].value_counts()
# Create a pie chart
vegetation_counts.plot(kind='pie', autopct=absolute_value, cmap=mycmap) #'%1.1f%%'

# Optional: Set the aspect ratio to be equal, so the pie is drawn as a circle.
plt.axis('equal')
vegetation_counts
plt.savefig('figures/piechart.svg')
plt.savefig('figures/piechart.png',dpi=300,bbox_inches = "tight")

In [None]:
#gdf = gpd.GeoDataFrame(angolaYear_sample_combined, geometry=angolaYear_sample_combined['geometry'])

# Define a color for each category
color_dict = {    
    'Forest':  'darkgreen',
    'Herbaceous Vegetation':  'salmon',
    'Shrubs': 'cornflowerblue',
}

# Create a custom color map
aYu = ['Forest', 'Herbaceous Vegetation', 'Shrubs']# angolaYear['LC_descrip'].unique()
mycmap = matplotlib.colors.ListedColormap([color_dict[x] for x in aYu])

# Plot the sample dataset
crs = ccrs.PlateCarree()
fig, axis = plt.subplots(
    ncols=1, nrows=1, figsize=[12, 6], subplot_kw={"projection":crs}
)

axis.set_extent(angolaextent,crs = crs) #limit the figure to the Angola size
angolaYear_sample_combined.plot(column='LC_descrip', legend=True,cmap=mycmap, ax=axis) #before cmap='OrRd'
legend = axis.get_legend()
legend.set_bbox_to_anchor((0.75, 0.18)) 

axis.gridlines(draw_labels=True, crs = crs)
axis.coastlines()
axis.add_feature(cfeature.BORDERS, edgecolor='blue')
if fireselection =='Largest':
    axis.set_title('50 largest wildfires in Angola (2020)')
plt.savefig('figures/firemap_'+fireselection+'.svg')
plt.savefig('figures/firemap_'+fireselection+'.png',dpi=300,bbox_inches = "tight")

In [None]:
OneFire = angola_sorted[1:2]
#compute fire rectangle and dates
rect = find_fire_rect(OneFire)
print(f"Longitude from: {rect['lon_min']}, to: {rect['lon_max']}")
print(f"Latitude from : {rect['lat_min']}, Maximum y: {rect['lat_max']}")  

firedate = find_fire_dates(OneFire)
print(f"Fire from: {firedate['idatestr']} {firedate['idateMstr']}, to: {firedate['fdatestr']} {firedate['fdateMstr']}")

precipFireMean, precipFireMeanTime = get_fire_precipitation(chirps_data,rect,firedate)
CTempFireMean, CTempFireMeanTime = get_fire_temperature(CTemp,rect,firedate)

In [None]:
# plot the fire in OneFire variable

#crs = ccrs.Robinson() #does not work here for gridlines
crs = ccrs.PlateCarree()
fig, axis = plt.subplots(
    ncols=1, nrows=1, figsize=[12, 6], subplot_kw={"projection":crs}
)

axis.set_extent(angolaextent,crs = crs) #limit the figure to the Angola size
OneFire.plot(ax = axis, color = "red") #plot the selected fire
axis.gridlines(draw_labels=True, crs = crs)
axis.coastlines()
axis.add_feature(cfeature.BORDERS, edgecolor='blue')

axis.set_title("Selected wildfire of 2020 in Angola")
#plt.show()
plt.savefig('figures/firePolygon.svg')

### plot precipitation

In [None]:
#precipitation
_, precipFireMean = get_fire_precipitation(chirps_data,rect,firedate)

crs = ccrs.PlateCarree()

fig, axis = plt.subplots(
    ncols=1, nrows=1, figsize=[12, 6], subplot_kw={"projection": crs}
)
timestring = f"Precipitation mean (fire duration {firedate['idateMstr']} to {firedate['fdateMstr']})  ";
precipFireMeanTime.plot( #plot mean over time
    ax=axis,
    x="longitude",
    y="latitude",
    transform=crs,
    cmap="magma",
    robust=True,
)
axis.set_extent(angolaextent,crs = crs)
axis.coastlines()
axis.set_title(timestring)
axis.gridlines(draw_labels=True, crs = crs)
axis.add_feature(cfeature.BORDERS, edgecolor='red')
plt.savefig('figures/precipitationSquare.svg')

## Temperature ERA

### plot temperature for one fire

In [None]:
_, CTempFireMeanTime = get_fire_temperature(CTemp,rect,firedate)

crs = ccrs.PlateCarree()
fig, axs = plt.subplots(
    ncols=1, nrows=1, figsize=[12, 6], subplot_kw={"projection": crs}
)
CTempFireMeanTime.plot( #.mean(dim="time") #plot mean over time
    ax=axs,
    x="longitude",
    y="latitude",
    transform=crs,
    cmap="magma",
    robust=True
)
timestring = f"Temperature mean (fire duration {firedate['idateMstr']} to {firedate['fdateMstr']})";
axs.set_extent(angolaextent,crs = crs)
axs.coastlines()
axs.set_title(timestring)
axis.gridlines(draw_labels=True, crs = crs)
axs.add_feature(cfeature.BORDERS, edgecolor='red')

# NDVI MODIS

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import matplotlib
from osgeo import gdal
import rioxarray as rxr
import xarray as xr
from datetime import datetime, timedelta

In [None]:
#ChatGPT4
def extract_date_from_filename(filename):
    # Split the filename and extract the part with the date
    date_str = filename.split('.')[1]  # 'A2020353' in your example

    # Extract the year and the day of the year
    year = int(date_str[1:5])  # '2020' in your example
    day_of_year = int(date_str[5:])  # '353' in your example

    # Convert to a date
    date = datetime(year, 1, 1) + timedelta(days=day_of_year - 1)

    return date.strftime('%Y-%m-%d')

# Example usage
#filename = 'MOD09A1.A2020001.h19v09.061.2020324110240.hdf'
#print(extract_date_from_filename(filename))  # Output: '2020-12-18'


In [None]:
filenames = ['MOD09A1.A2020001.h19v09.061.2020324110240.hdf','MOD09A1.A2020009.h19v09.061.2020326083028.hdf',
'MOD09A1.A2020017.h19v09.061.2020328150953.hdf','MOD09A1.A2020025.h19v09.061.2020328182654.hdf',
'MOD09A1.A2020033.h19v09.061.2020329022052.hdf','MOD09A1.A2020041.h19v09.061.2020329105652.hdf',
'MOD09A1.A2020049.h19v09.061.2020335034956.hdf','MOD09A1.A2020057.h19v09.061.2020335045254.hdf',
'MOD09A1.A2020065.h19v09.061.2020330102458.hdf','MOD09A1.A2020073.h19v09.061.2020330201948.hdf',
'MOD09A1.A2020081.h19v09.061.2020331132917.hdf','MOD09A1.A2020089.h19v09.061.2020335062149.hdf',
'MOD09A1.A2020097.h19v09.061.2020332133649.hdf','MOD09A1.A2020105.h19v09.061.2020332144730.hdf',
'MOD09A1.A2020113.h19v09.061.2020333054255.hdf','MOD09A1.A2020121.h19v09.061.2020333100613.hdf',
'MOD09A1.A2020129.h19v09.061.2020334142930.hdf','MOD09A1.A2020137.h19v09.061.2020335005033.hdf',
'MOD09A1.A2020145.h19v09.061.2020335093624.hdf','MOD09A1.A2020153.h19v09.061.2020336020656.hdf',
'MOD09A1.A2020161.h19v09.061.2020336133433.hdf','MOD09A1.A2020169.h19v09.061.2020336203227.hdf',
'MOD09A1.A2020177.h19v09.061.2020340130036.hdf','MOD09A1.A2020185.h19v09.061.2020340145230.hdf',
'MOD09A1.A2020193.h19v09.061.2020340161042.hdf','MOD09A1.A2020201.h19v09.061.2020340173059.hdf',
'MOD09A1.A2020209.h19v09.061.2020341064720.hdf','MOD09A1.A2020217.h19v09.061.2020342044732.hdf',
'MOD09A1.A2020225.h19v09.061.2020343012632.hdf','MOD09A1.A2020233.h19v09.061.2020344232309.hdf',
'MOD09A1.A2020241.h19v09.061.2020345162625.hdf','MOD09A1.A2020249.h19v09.061.2020346143213.hdf',
'MOD09A1.A2020257.h19v09.061.2020347014143.hdf','MOD09A1.A2020265.h19v09.061.2020347103225.hdf',
'MOD09A1.A2020273.h19v09.061.2020350000613.hdf','MOD09A1.A2020281.h19v09.061.2020348055523.hdf',
'MOD09A1.A2020289.h19v09.061.2020349214809.hdf','MOD09A1.A2020297.h19v09.061.2020353000648.hdf',
'MOD09A1.A2020305.h19v09.061.2020353065147.hdf','MOD09A1.A2020313.h19v09.061.2020353135617.hdf',
'MOD09A1.A2020321.h19v09.061.2020354002727.hdf','MOD09A1.A2020329.h19v09.061.2020357074335.hdf',
'MOD09A1.A2020337.h19v09.061.2020363141001.hdf','MOD09A1.A2020345.h19v09.061.2021011215413.hdf',
'MOD09A1.A2020353.h19v09.061.2021006075202.hdf','MOD09A1.A2020361.h19v09.061.2021012071524.hdf'
]
modis_dir="/home/jovyan/shared-public/Jintasaurus_Skip_Energico/modis_images/"
ff = modis_dir + filenames[0]
ds  = rxr.open_rasterio(ff,  masked = True)
ds

In [None]:
#create combined MODIS Dataset from all 46 2020 files
#this script always crashes the server, without saving the whole dataset, it is probably too large
datasets = []
for filename in filenames:    
    # Read the file into a Dataset
    ff = os.path.expanduser("~/shared-public/Jintasaurus_Skip_Energico/modis_images/" + filename)
    try:
        ds  = rxr.open_rasterio(ff,  masked = True)
        #display(ds)
        #break
    except:
        print("An exception for " + filename)    
    else:
        print("OK: " + filename) 
        date = pd.Timestamp(extract_date_from_filename(filename))    
        # Assign the 'Date' coordinate
        ds = ds.assign_coords(date=date)
        datasets.append(ds)
        
## Concatenate all datasets along the 'Date' dimension
#combined_ds = xr.concat(datasets, dim='date')
#print("Combined")  
## Save the Dataset to a NetCDF file
#combined_ds.to_netcdf('MODIS2020.nc')
#print("Saved MODIS2020.nc")   
#combined_ds


In [None]:
modis_dir="/home/jovyan/shared-public/Jintasaurus_Skip_Energico/modis_images/modis_10/"

for filename in os.listdir(modis_dir):
    ff = os.path.join(modis_dir, filename)
    if os.path.isfile(ff):
        try:
            ds  = rxr.open_rasterio(ff,  masked = True)
        except:
            print("An exception for " + filename)    
        else:
            print("OK: " + filename) 
            date = pd.Timestamp(extract_date_from_filename(filename))    
            # Assign the 'Date' coordinate
            ds = ds.assign_coords(date=date)
            datasets.append(ds)

In [None]:
# Load the Dataset from the NetCDF file
combined_ds = xr.open_dataset('MODIS2020.nc')
combined_ds.sel(date='2020-02-07', method='nearest', tolerance=timedelta(days=7))