# Extract district stats

This notebook extracts stast per district. You can specify which district you are interested in.

In [1]:
from pathlib import Path
import datetime
import pandas as pd
import xarray as xr
import rioxarray as rioxr
import dask
import zarr
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

## What do you want to extract

parameters

In [2]:
districtsToExtract = ['Bagerhat', 'Bandarban', 'Sylhet'] # Either a series of districts, or 'All'

In [3]:
timeRange = ['2017-01-01', '2020-12-31'] # Either a time start and stop, or 'All'

In [4]:
outputCSV = 'DistrictInundationValues.csv' # target for the output csv file

Inputs

In [5]:
datapath = Path('Data/BangladeshTimeSeriesZarr/') #path to the zarr data

In [None]:
# path to the district shapefile
shapefilepath = Path('Data/ShapeFiles/bgd_adm_bbs_20201113_SHP/bgd_admbnda_adm2_bbs_20201113.shp')

## Load data and modify according to setup

In [6]:
dataset = xr.open_zarr(datapath)

In [7]:
dataset['time'] = pd.DatetimeIndex(dataset['time'].values)

In [8]:
if timeRange != 'All':
    index = (dataset.time.values >= pd.to_datetime(timeRange[0], format='%Y-%m-%d'))\
    & (dataset.time.values <= pd.to_datetime(timeRange[1], format='%Y-%m-%d'))
    dataset = dataset.sel(time=index)

In [10]:
districts = gpd.read_file(shapefilepath)

In [11]:
if districtsToExtract != 'All':
    districts = districts.iloc[[name in districtsToExtract for name in districts.ADM2_EN.values],:]

In [12]:
crs = districts.crs

In [13]:
districts = districts.set_crs(crs)

In [14]:
dataset = dataset.rio.set_crs(crs)

In [15]:
dataset = dataset.rio.write_crs()

## Extract values

In [16]:
dfs = []

# iterate over selected districts
for index, row in districts.iterrows():
    geometry = row['geometry']
    # clip dataset to selected districts
    clipped_dataset = dataset.rio.clip([geometry], crs, drop=True, invert=False, all_touched=True)
    
    # calculate quantiles
    quantinundation = clipped_dataset.quantile([.025, .25,.5,.75, .975], dim=['x','y'], skipna=True)
    dfquants = quantinundation.to_dataframe()
    dfquants.columns = ['Quantile']
    dfquants = dfquants.unstack().reset_index().drop(columns=['band'])
    names = [f"{pair[0]}_{pair[1]}" for pair in dfquants.columns]
    names[0] = 'time'
    dfquants.columns = dfquants.columns.droplevel(0)
    dfquants.columns = names
    dfquants.drop(columns=['Quantile_0.5'])

    # calculate median
    dfmedian = clipped_dataset.median(dim=['x','y'], skipna=True).to_dataframe()
    dfmedian = dfmedian.drop(columns='spatial_ref')
    dfmedian.columns = ['Median']
    dfmedian = dfmedian.reset_index().drop(columns=['band'])

    # calculate mean
    dfmean = clipped_dataset.mean(dim=['x','y'], skipna=True).to_dataframe()
    dfmean = dfmean.drop(columns='spatial_ref')
    dfmean.columns = ['Mean']
    dfmean = dfmean.reset_index().drop(columns=['band'])

    # calculate std
    dfstd = clipped_dataset.std(dim=['x','y'], skipna=True).to_dataframe()
    dfstd = dfstd.drop(columns='spatial_ref')
    dfstd.columns = ['Std']
    dfstd = dfstd.reset_index().drop(columns=['band'])

    # calculate min
    dfmin = clipped_dataset.min(dim=['x','y'], skipna=True).to_dataframe()
    dfmin = dfmin.drop(columns='spatial_ref')
    dfmin.columns = ['Min']
    dfmin = dfmin.reset_index().drop(columns=['band'])

    # calculate max
    dfmax = clipped_dataset.max(dim=['x','y'], skipna=True).to_dataframe()
    dfmax = dfmax.drop(columns='spatial_ref')
    dfmax.columns = ['Max']
    dfmax = dfmax.reset_index().drop(columns=['band'])

    # merge all togeter
    dfMerged = dfquants.merge(dfmedian, on='time').merge(dfmean, on='time').merge(dfstd, on='time').merge(dfmin, on='time').merge(dfmax, on='time').rename(columns={'time':'Date'})
    dfMerged['Region'] = row.ADM1_EN
    dfMerged['District'] = row.ADM2_EN
    dfMerged['Day'] = [d.day for d in dfMerged.Date]
    dfMerged['Month'] = [d.month for d in dfMerged.Date]
    dfMerged['Year'] = [d.year for d in dfMerged.Date]
    
    dfs.append(dfMerged)

  dfquants = dfquants.unstack().reset_index().drop(columns=['band'])
  dfquants = dfquants.unstack().reset_index().drop(columns=['band'])
  dfquants = dfquants.unstack().reset_index().drop(columns=['band'])


In [17]:
# save as csv
pd.concat(dfs).to_csv(outputCSV, index=False)