In [None]:
import geopandas as gpd
import pandas as pd
import os

In [None]:
data_dir = os.environ['AA_DATA_DIR']
bgd_dir = os.path.join(data_dir, 'exploration', 'bangladesh')

gdf_adm = gpd.read_file(os.path.join(bgd_dir, 'ADM_Shp/selected_distict_mauza.shp')).to_crs('EPSG:4326')
df_summary = pd.read_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_summary_QA.csv'))
df_survey = pd.read_csv(os.path.join(bgd_dir, 'CDP_Survey/household_locations_impactevaluation_landtype.csv'))
df_ts_sent = pd.read_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_extent_sentinel.csv'))
df_ts_interp = pd.read_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_extent_interpolated.csv'))

Let's rename some of the columns to standardize them across datasets.

In [None]:
df_summary = df_summary.rename(columns={'COV': 'ERR', 'PCODE': 'OBJECTID'})
df_ts_interp = df_ts_interp.rename(columns={'PCODE': 'OBJECTID', 'FLOOD_EXTENT': 'FLOOD_FRACTION', 'date': 'DATE'})
df_ts_sent = df_ts_sent.rename(columns={'MAUZ_PCODE': 'OBJECTID', 'flooded_fraction': 'FLOOD_FRACTION', 'date': 'DATE'})

Let's add some of the additional QA flag columns.

In [None]:
df_summary['PEAK_DIFF'] = abs(df_summary['DIFF_SAT'])>20
df_summary['ERR_ERR'] = df_summary['ERR']>20

We'll subset the results to just the mauzas that have survey data.

In [None]:
survey_mauz = set(df_survey['OBJECTID'])
df_summary_survey = df_summary[df_summary['OBJECTID'].isin(survey_mauz)]
assert len(df_summary_survey.index)==len(survey_mauz)

Let's also subset both of the time series datasets by the mauzas in the survey.

In [None]:
df_ts_sent_survey = df_ts_sent[df_ts_sent['OBJECTID'].isin(survey_mauz)]
assert len(set(df_ts_sent_survey.OBJECTID)) == len(survey_mauz)
df_ts_sent_survey.to_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_extent_sentinel_survey.csv'), index=False)

df_ts_interp_survey = df_ts_interp[df_ts_interp['OBJECTID'].isin(survey_mauz)]
assert len(set(df_ts_interp_survey.OBJECTID)) == len(survey_mauz)
df_ts_interp_survey.to_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_extent_interpolated_survey.csv'), index=False)

Let's count the number of mauzas that have problems with the Gaussian fitting.

In [None]:
print(df_summary_survey.NO_FIT.sum())
print(df_summary_survey.NEG.sum())
print(df_summary_survey.RIVER.sum())
print(df_summary_survey.FWHM_ERR.sum())
print(df_summary_survey.MAX_DIFF.sum())
print(df_summary_survey.PEAK_DIFF.sum())
print(df_summary_survey.ERR_ERR.sum())

Let's calculate the centroid of each mauza and join this in with the survey data.

In [None]:
gdf_adm = gdf_adm.to_crs('EPSG:32646')
gdf_adm['centroid'] = gdf_adm.centroid
gdf_adm = gdf_adm.set_geometry('centroid')
gdf_adm = gdf_adm.to_crs('EPSG:4326')
gdf_adm['LAT'] = gdf_adm['centroid'].y
gdf_adm['LON'] = gdf_adm['centroid'].x
df_summary_survey = df_summary_survey.merge(gdf_adm[['OBJECTID', 'LAT', 'LON']], left_on='OBJECTID', right_on='OBJECTID', how='left')
#df_summary_survey = df_summary_survey.drop(columns=['OBJECTID'])

In [None]:
df_summary_survey.head(5)

In [None]:
df_summary_survey.to_csv(os.path.join(bgd_dir, 'FE_Results/June_Aug/MAUZ_flood_summary_QA_survey.csv', index=False))