In [1]:
import geopandas as gpd
import pandas as pd
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [3]:
gdf_adm = gpd.read_file('../data/ADM_Shp/selected_distict_mauza.shp').to_crs('EPSG:4326')
df_summary = pd.read_csv('../data/FE_Results/MAUZ_flood_summary_QA.csv')
df_survey = pd.read_csv('../data/CDP_Survey/household_locations_impactevaluation_matched_floodlevel_exposure.csv')
df_ts_sent = pd.read_csv('../data/FE_Results/MAUZ_flood_extent_sentinel.csv')
df_ts_interp = pd.read_csv('../data/FE_Results/MAUZ_flood_extent_interpolated.csv')

We'll subset the results to just the mauzas that have survey data.

In [10]:
survey_mauz = set(df_survey['OBJECTID'])
df_summary_survey = df_summary[df_summary['PCODE'].isin(survey_mauz)]
assert len(df_summary_survey.index)==len(survey_mauz)

Let's also subset both of the time series datasets by the mauzas in the survey.

In [11]:
df_ts_sent_survey = df_ts_sent[df_ts_sent['MAUZ_PCODE'].isin(survey_mauz)]
assert len(set(df_ts_sent_survey.MAUZ_PCODE)) == len(survey_mauz)
df_ts_sent_survey.to_csv('../../data/FE_Results/MAUZ_flood_extent_sentinel_survey.csv', index=False)

df_ts_interp_survey = df_ts_interp[df_ts_interp['PCODE'].isin(survey_mauz)]
assert len(set(df_ts_interp_survey.PCODE)) == len(survey_mauz)
df_ts_interp_survey.to_csv('../../data/FE_Results/MAUZ_flood_extent_interpolated_survey.csv', index=False)

Let's count the number of mauzas that have problems with the Gaussian fitting.

In [12]:
print(df_summary_survey.NO_FIT.sum())
print(df_summary_survey.OOR.sum())
print(df_summary_survey.NEG.sum())
print(df_summary_survey.RIVER.sum())
print(df_summary_survey.FWHM_ERR.sum())

7
0
0
1
1


Let's calculate the centroid of each mauza and join this in with the survey data.

In [13]:
gdf_adm = gdf_adm.to_crs('EPSG:32646')
gdf_adm['centroid'] = gdf_adm.centroid
gdf_adm = gdf_adm.set_geometry('centroid')
gdf_adm = gdf_adm.to_crs('EPSG:4326')
gdf_adm['LAT'] = gdf_adm['centroid'].y
gdf_adm['LON'] = gdf_adm['centroid'].x
df_summary_survey = df_summary_survey.merge(gdf_adm[['OBJECTID', 'LAT', 'LON']], left_on='PCODE', right_on='OBJECTID', how='left')
df_summary_survey = df_summary_survey.drop(columns=['OBJECTID'])

In [14]:
df_summary_survey.head(5)

Unnamed: 0,COV,DIFF_SAT,FWHM,MAX_SAT,PCODE,PEAK_G,PEAK_SAT,RMSE,MAX_G,NO_FIT,OOR,NEG,RIVER,FWHM_ERR,LAT,LON
0,1.629472,7.0,25.0,0.1145,425.0,2020-07-14,2020-07-21,0.017392,0.088176,False,False,False,False,False,26.21427,89.65501
1,3.687077,0.0,27.0,0.0653,489.0,2020-07-21,2020-07-21,0.016414,0.041429,False,False,False,False,False,26.195581,89.65772
2,2.896798,2.0,33.0,0.1253,533.0,2020-07-19,2020-07-21,0.023903,0.085048,False,False,False,False,False,26.181015,89.68044
3,2.04737,1.0,23.0,0.0698,546.0,2020-07-26,2020-07-27,0.011198,0.042605,False,False,False,False,False,26.185698,89.640361
4,2.294267,3.0,31.0,0.1427,605.0,2020-07-18,2020-07-21,0.026578,0.114291,False,False,False,False,False,26.171229,89.66635


In [15]:
df_summary_survey.to_csv('../../data/FE_Results/MAUZ_flood_summary_QA_survey.csv', index=False)