# Appendix to Geo4Dev Satellite Crop Type Mapping Learning Module
## Create sample survey dataset with plot polygons

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon

# define 3 random plots
plot_a = gpd.GeoDataFrame(pd.DataFrame({'plot':'plot_a'}, index=[0]), geometry = [Polygon([(34.557480,-14.305434),(34.556651,-14.305580),(34.556233,-14.305096),(34.557424,-14.304969)])], crs=4326)
plot_b = gpd.GeoDataFrame(pd.DataFrame({'plot':'plot_b'}, index=[0]), geometry = [Polygon([(33.932340,-11.566691),(33.931471,-11.567127),(33.931160,-11.566817),(33.931981,-11.566139)])], crs=4326)
plot_c = gpd.GeoDataFrame(pd.DataFrame({'plot':'plot_c'}, index=[0]), geometry = [Polygon([(33.340902,-13.601511),(33.339132,-13.602178),(33.338096,-13.600041, ),(33.340001,-13.599425)])], crs=4326)
# Combine sample plots into single geodataframe
plots_gdf = pd.concat([plot_a,plot_b,plot_c]).reset_index()
# Visualize data at intermediate stage
plots_gdf.head()



Unnamed: 0,index,plot,geometry
0,0,plot_a,"POLYGON ((34.55748 -14.30543, 34.55665 -14.305..."
1,0,plot_b,"POLYGON ((33.93234 -11.56669, 33.93147 -11.567..."
2,0,plot_c,"POLYGON ((33.34090 -13.60151, 33.33913 -13.602..."


In [2]:
# Create sample dataframe with random values
survey_df = pd.DataFrame({'hh_id':['0002-001','0049-005','0087-003'],'garden_id':['RG04','RG02','RG03'],'plot_id':['R01','R01','R02'],
    'crop_code_a':[1,12,1],'crop_code_b':[34,38,28],'crop_code_c':[12,None,38],
    'crop_code_d':[None,None,42],'crop_code_e':[None,28,None]})
survey_df['geometry'] = plots_gdf['geometry']
survey_df['plot'] = plots_gdf['plot']

# Convert to GeoPandas GeoDataFrame type (identify as geographic data)
survey_gdf = gpd.GeoDataFrame(survey_df)

# Visualize final dataset
survey_gdf.head()

Unnamed: 0,hh_id,garden_id,plot_id,crop_code_a,crop_code_b,crop_code_c,crop_code_d,crop_code_e,geometry,plot
0,0002-001,RG04,R01,1,34,12.0,,,"POLYGON ((34.55748 -14.30543, 34.55665 -14.305...",plot_a
1,0049-005,RG02,R01,12,38,,,28.0,"POLYGON ((33.93234 -11.56669, 33.93147 -11.567...",plot_b
2,0087-003,RG03,R02,1,28,38.0,42.0,,"POLYGON ((33.34090 -13.60151, 33.33913 -13.602...",plot_c


We'll also create our unique identifier by **hashing** the hh_id, garden_id, and plot_id fields:

In [3]:
# combine fields which can uniquely identify a plot for hashing
survey_gdf['to_hash'] = survey_gdf['hh_id'] + "-" + survey_gdf['garden_id'] + \
    "-" + survey_gdf['plot_id']
# create unique id
survey_gdf['unique_id'] = survey_gdf['to_hash'].apply(hash)
survey_gdf.head()

Unnamed: 0,hh_id,garden_id,plot_id,crop_code_a,crop_code_b,crop_code_c,crop_code_d,crop_code_e,geometry,plot,to_hash,unique_id
0,0002-001,RG04,R01,1,34,12.0,,,"POLYGON ((34.55748 -14.30543, 34.55665 -14.305...",plot_a,0002-001-RG04-R01,-6645496217314902902
1,0049-005,RG02,R01,12,38,,,28.0,"POLYGON ((33.93234 -11.56669, 33.93147 -11.567...",plot_b,0049-005-RG02-R01,-8821404992437737508
2,0087-003,RG03,R02,1,28,38.0,42.0,,"POLYGON ((33.34090 -13.60151, 33.33913 -13.602...",plot_c,0087-003-RG03-R02,2659752206514122228


In [5]:
# drop fields unnecessary for our model
survey_gdf.drop(columns=['hh_id','garden_id','plot_id','to_hash'], inplace=True)
survey_gdf.head()

Unnamed: 0,crop_code_a,crop_code_b,crop_code_c,crop_code_d,crop_code_e,geometry,plot,unique_id
0,1,34,12.0,,,"POLYGON ((34.55748 -14.30543, 34.55665 -14.305...",plot_a,-6645496217314902902
1,12,38,,,28.0,"POLYGON ((33.93234 -11.56669, 33.93147 -11.567...",plot_b,-8821404992437737508
2,1,28,38.0,42.0,,"POLYGON ((33.34090 -13.60151, 33.33913 -13.602...",plot_c,2659752206514122228


In [6]:
# Save sample data as GeoJSON file for use in module
survey_gdf.to_file('sample_survey_fields_geo.geojson', driver='GeoJSON')