In [42]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import geopandas as gpd
import cartopy.feature as cfeature
from datetime import datetime, timedelta
from cartopy import crs as ccrs
from shapely import geometry

The ground truth data was manually collected for selected sample area by analysing the satellite images from 

* TERRA
* AQUA
* NPP
* Sentinel
* Planet

In [97]:
data_dir = '../../references/ground_truth/raw/'
file_list = ['1HSep2019_groundtruth.shp', '2HSep2019_groundtruth.shp', 'April2019_groundtruth.shp']
file_list = [data_dir + i for i in file_list]
hs_pdf = pd.concat([gpd.read_file(file) for file in file_list], ignore_index=True)

In [45]:
#convert to geopandas DataFrame
ground_truth_df = gpd.GeoDataFrame(hs_pdf, crs=gpd.read_file(file_list[0]).crs)

In [46]:
ground_truth_df['lat'] = ground_truth_df.geometry.y
ground_truth_df['lon'] = ground_truth_df.geometry.x

In [47]:
ground_truth_df.rename(columns={"Date": "datestamp"}, inplace=True)

In [75]:
#clean up 'datastamp' field
print (ground_truth_df['datestamp'].unique())
error_mapping = {'2019/09/18v': '2019/09/18', 
                 'NULL2019/04/24': '2019/04/24', 
                 'NULL2019/04/23': '2019/04/23', 
                 '2019/04/03\n': '2019/04/03'}
for key, value in error_mapping.items():
    ground_truth_df.loc[ground_truth_df.datestamp == key, 'datestamp'] = value

['2019/09/01' '2019/09/02' '2019/09/03' '2019/09/04' '2019/09/05'
 '2019/09/06' '2019/09/07' '2019/09/08' '2019/09/09' '2019/09/10'
 '2019/09/11' '2019/09/12' '2019/09/13' '2019/09/14' '2019/09/15'
 '2019/09/30' '2019/09/29' '2019/09/28' '2019/09/27' '2019/09/26'
 '2019/09/25' '2019/09/24' '2019/09/23' '2019/09/22' '2019/09/21'
 '2019/09/20' '2019/09/19' '2019/09/18' '2019/09/17' '2019/09/16'
 '2019/04/01' '2019/04/02' '2019/04/03' '2019/04/04' '2019/04/05'
 '2019/04/06' '2019/04/07' '2019/04/08' '2019/04/09' '2019/04/10'
 '2019/04/11' '2019/04/12' '2019/04/13' '2019/04/14' '2019/04/15'
 '2019/04/16' '2019/04/17' '2019/04/18' '2019/04/19' '2019/04/20'
 '2019/04/21' '2019/04/22' '2019/04/23' '2019/04/24' '2019/04/25'
 '2019/04/26' '2019/04/27' '2019/04/28' '2019/04/29' '2019/04/30']


In [90]:
#after cleaning up, check all unique entries fit with format
import re
pattern = '2019/[0-1]\d/[0-3]\d'
for entry in ground_truth_df['datestamp'].unique():
    try: 
        assert bool(re.match(pattern, entry))
    except:
        print ('Error encountered!')

The below is not necessary, but more for legacy handling as some ground truth data may have dates that span across multiple days.

In [91]:
ground_truth_df['startdate'] = '0'
ground_truth_df['enddate'] = ground_truth_df['datestamp']

for index, row in ground_truth_df.iterrows():
    end_day = int(row['enddate'].split('/')[2])
    end_month = int(row['enddate'].split('/')[1])
    end_year = int(row['enddate'].split('/')[0])
    ground_truth_df.loc[index, 'end_date'] = datetime(end_year, end_month, end_day)
            
    if row['startdate'] == '0':
        ground_truth_df.loc[index, 'start_date'] = datetime(end_year, end_month, end_day)
    else:
        ground_truth_df.loc[index, 'start_date'] = datetime(end_year, start_month, start_day)

In [92]:
ground_truth_df['period'] = ground_truth_df['end_date'] - ground_truth_df['start_date']

In [93]:
ground_truth_gjson = ground_truth_df.copy()
ground_truth_gjson

Unnamed: 0,datestamp,geometry,lat,lon,startdate,enddate,end_date,start_date,period
0,2019/09/01,POINT (102.15084 0.42880),0.428803,102.150839,0,2019/09/01,2019-09-01,2019-09-01,0 days
1,2019/09/01,POINT (102.15204 0.42568),0.425679,102.152041,0,2019/09/01,2019-09-01,2019-09-01,0 days
2,2019/09/01,POINT (102.40703 -0.12635),-0.126350,102.407027,0,2019/09/01,2019-09-01,2019-09-01,0 days
3,2019/09/01,POINT (102.39958 -0.12563),-0.125629,102.399577,0,2019/09/01,2019-09-01,2019-09-01,0 days
4,2019/09/01,POINT (102.41183 -0.12611),-0.126109,102.411833,0,2019/09/01,2019-09-01,2019-09-01,0 days
5,2019/09/01,POINT (102.49859 0.88686),0.886864,102.498591,0,2019/09/01,2019-09-01,2019-09-01,0 days
6,2019/09/01,POINT (102.61154 -0.36307),-0.363071,102.611544,0,2019/09/01,2019-09-01,2019-09-01,0 days
7,2019/09/01,POINT (102.65576 -0.39576),-0.395755,102.655764,0,2019/09/01,2019-09-01,2019-09-01,0 days
8,2019/09/01,POINT (103.94026 -1.25146),-1.251460,103.940258,0,2019/09/01,2019-09-01,2019-09-01,0 days
9,2019/09/01,POINT (103.98770 -1.28780),-1.287797,103.987699,0,2019/09/01,2019-09-01,2019-09-01,0 days


In [94]:
ground_truth_gjson = ground_truth_gjson.to_crs({'init': 'epsg:3857'})
ground_truth_gjson.drop(columns=['startdate', 'enddate'], inplace=True)

In [96]:
ground_truth_gjson.to_csv('../../references/ground_truth/processed/daily_ground_truth_Apr_Sep_2019.csv')