In [1]:
# Libraries
import json, pandas as pd, pydeck as pdk, ee, ipyfilechooser, ipywidgets, datetime, sys
sys.path.append("./utils/")
import sentinel_satellites

In [2]:
# Initializes the Google Earth Engine APIs
ee.Authenticate()
ee.Initialize()


Successfully saved authorization token.


# Exloratory analysis

The project aim is to create a Machine Learning model capable of detecting the dates when a crop field has been manured, using satellite data. <br>
Before even starting considering models, it is useful to perform exploratory analysis on the obtained crop fields dataset.

## Import a JSON file containing crop fields details

In [3]:
# Choose the file (it must be a JSON file)
file_chooser = ipyfilechooser.FileChooser(path='../Datasets/main/', filename="main-crops.json", select_default=True, use_dir_icons=True, filter_pattern='*.json')
display(file_chooser)

FileChooser(path='/Users/francesco/Documents/Documents/Università (Università degli Studi, PV)/2_ MAGISTRALE…

In [4]:
# Load JSON data from file
with open(file_chooser.selected) as f:
    data = json.load(f)

# Create DataFrame with properties excluding "manure_dates" column
fields_df = pd.DataFrame([{k:v for k,v in f["properties"].items() if k!="manure_dates"} for f in data["features"]])

# Add column with coordinates for each field
fields_df["polygon_coordinates"] = [[tuple(c) for c in p] for f in data["features"] for p in f["geometry"]["coordinates"]]

# Create a dataframe that just has the columns crop_field_name and manure_dates
y_df = pd.DataFrame([{k:v for k,v in f["properties"].items() if k!="polygon_coordinates"} for f in data["features"]])

In [5]:
# Show the entire dataframe
entire_df = fields_df.merge(y_df, on='crop_field_name')
entire_df

Unnamed: 0,crop_field_name,polygon_coordinates,manure_dates
0,P-BLD,"[(-4.202723286616649, 43.39683579015289), (-4....",[2022-05-26]
1,P-BLLT1,"[(-4.085622203603083, 43.429605845026266), (-4...",[2022-05-16]
2,P-BLLT2,"[(-4.084840437376829, 43.430826294936246), (-4...",[2022-05-26]
3,P-Cardana,"[(8.658803437240303, 45.85842753378426), (8.65...",[2022-02-24]
4,P-CBRCS1,"[(-4.200826431306206, 43.39067464298489), (-4....",[2022-05-26]
5,P-CBRCS2,"[(-4.204911872695676, 43.3876170244562), (-4.2...",[2022-05-26]
6,P-CLGT,"[(-4.111699726693341, 43.39830644556494), (-4....",[2022-05-16]
7,P-CLMBRS,"[(-4.544769098140127, 43.38040395682432), (-4....",[2022-05-26]
8,P-CMNTR,"[(-4.147208715069137, 43.40038457218137), (-4....",[2022-05-16]
9,P-DR,"[(-4.142486752802821, 43.396858931472195), (-4...",[2022-03-21]


## Show crop fields on a map

In [6]:
# Define the layer with a tooltip
layer = pdk.Layer(
    "PolygonLayer",
    data=entire_df,
    get_polygon="polygon_coordinates",
    get_fill_color=[255, 255, 0, 100],
    get_line_color=[255, 255, 0, 100],
    stroked=True,
    filled=True,
    lineWidthMinPixels=3,
    pickable=True,
    auto_highlight=True,
)

# Define the initial view state of the map
view_state = pdk.ViewState(
    longitude=fields_df.polygon_coordinates[0][0][0],
    latitude=fields_df.polygon_coordinates[0][0][1],
    zoom=7.8
)

# Create the map with the layers and the initial view state
r = pdk.Deck(layers=layer, initial_view_state=view_state,)

# Show the map
r.show()


DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…

It can be noticed that our fields are placed in the Northern part of Spain. Please consider generalization issue.

## Features extraction

The objective is to generate a dataset that contains for each field, for each time the satellites (sentinel 1 and sentinel 2) have passed on the field (in a period, specified by the user), all the phisical indicators that will be further used to build the final model. <br>
This procedure has been designed to be performed in parallel in order to exploit the computational power of the machine (since each field is indipendent with the others).

In [7]:
start_date_widget = ipywidgets.widgets.DatePicker(description='Start date', value=datetime.date(2022, 1, 1), disabled=False)
display(start_date_widget)

end_date_widget = ipywidgets.widgets.DatePicker(description='End date', value=datetime.date(2022, 12, 31), disabled=False)
display(end_date_widget)

DatePicker(value=datetime.date(2022, 1, 1), description='Start date')

DatePicker(value=datetime.date(2022, 12, 31), description='End date')

### Sentinel 2 (optical features)

In [8]:
# Get all the mean features for the crop fields inside the dataframe, within a time period, using sentinel 2 satellites
fields_s2_features_extracted_df = sentinel_satellites.get_features(fields_df, start_date_widget.value, end_date_widget.value, sentinel=2)
# Add manure dates
fields_s2_features_extracted_df = fields_s2_features_extracted_df.merge(y_df, on=str(y_df.columns[0]))

# Show the dataframe
fields_s2_features_extracted_df

Unnamed: 0,crop_field_name,s2_acquisition_date,NDVI,EOMI1,EOMI2,EOMI3,EOMI4,NBR2,SAVI,MSAVI,manure_dates
0,P-CLGT,2022-01-06,0.890879,-0.475203,0.478283,-0.266043,0.717336,0.364730,1.272070,0.942166,[2022-05-16]
1,P-CLGT,2022-01-16,0.890132,-0.435185,0.514562,-0.223194,0.740412,0.365419,1.271003,0.941776,[2022-05-16]
2,P-CLGT,2022-01-26,0.639473,-0.354393,0.169373,0.005410,0.364515,0.208003,0.913113,0.779930,[2022-05-16]
3,P-CLGT,2022-02-05,0.423970,-0.287130,0.049286,0.166670,0.156132,0.107899,0.605364,0.590960,[2022-05-16]
4,P-CLGT,2022-02-10,0.487526,-0.318114,0.032209,0.103522,0.199279,0.168300,0.696156,0.654858,[2022-05-16]
...,...,...,...,...,...,...,...,...,...,...,...
774,P-VG2,2022-10-01,0.587973,-0.303828,0.178242,0.067826,0.359690,0.193909,0.839567,0.740201,[2022-04-13]
775,P-VG2,2022-11-10,0.389072,-0.122577,0.198866,0.282058,0.316113,0.125169,0.555542,0.556166,[2022-04-13]
776,P-VG2,2022-11-12,0.393333,-0.133609,0.163594,0.268466,0.304470,0.148289,0.561634,0.562339,[2022-04-13]
777,P-VG2,2022-11-17,0.308817,-0.158042,0.078185,0.292526,0.177674,0.100908,0.440931,0.470664,[2022-04-13]


### Sentinel 1 (radar features)

In [9]:
# Get all the mean features for the crop fields inside the dataframe, within a time period, using sentinel 1 satellites
fields_s1_features_extracted_df = sentinel_satellites.get_features(fields_df, start_date_widget.value, end_date_widget.value, sentinel=1)
# Add manure dates
fields_s1_features_extracted_df = fields_s1_features_extracted_df.merge(y_df, on=str(y_df.columns[0]))

# Show the dataframe
fields_s1_features_extracted_df

Unnamed: 0,crop_field_name,s1_acquisition_date,BSI,PBSI,CPBSI,TIRS,manure_dates
0,P-BLD,2022-01-07,-0.550754,0.550754,-0.982365,0.550754,[2022-05-26]
1,P-BLD,2022-01-08,-0.633442,0.633442,-0.994038,0.633442,[2022-05-26]
2,P-BLD,2022-01-19,-0.643399,0.643399,-0.991733,0.643399,[2022-05-26]
3,P-BLD,2022-01-20,-0.473732,0.473732,-0.988374,0.478347,[2022-05-26]
4,P-BLD,2022-01-31,-0.619382,0.619382,-0.991160,0.619382,[2022-05-26]
...,...,...,...,...,...,...,...
2005,P-VG2,2022-12-15,-0.571204,0.571204,-0.980734,0.571204,[2022-04-13]
2006,P-VG2,2022-12-21,-0.509244,0.509244,-0.988700,0.509244,[2022-04-13]
2007,P-VG2,2022-12-22,-0.473553,0.473553,-0.964357,0.473553,[2022-04-13]
2008,P-VG2,2022-12-26,-0.391835,0.391835,-0.978238,0.407867,[2022-04-13]


## Analysis

### General, simple yet useful statistics (for both sentinel 1 and sentinel 2)

In [10]:
fields_s2_features_extracted_df.describe()

Unnamed: 0,NDVI,EOMI1,EOMI2,EOMI3,EOMI4,NBR2,SAVI,MSAVI
count,779.0,779.0,779.0,779.0,779.0,779.0,779.0,779.0
mean,0.444667,-0.161461,0.179976,0.221867,0.330907,0.16989,0.634937,0.58507
std,0.201061,0.141119,0.136863,0.17452,0.167721,0.065969,0.287091,0.206764
min,0.011629,-0.52831,-0.350105,-0.350925,-0.214569,0.007748,0.016606,0.022675
25%,0.304264,-0.275585,0.150263,0.097871,0.303373,0.139627,0.434457,0.462922
50%,0.458626,-0.159874,0.189342,0.241391,0.356751,0.174864,0.654873,0.625134
75%,0.584209,-0.054712,0.218365,0.3748,0.383015,0.198568,0.834198,0.736632
max,0.942142,0.160089,0.664984,0.530544,0.827342,0.376216,1.345279,0.970074


In [11]:
fields_s1_features_extracted_df.describe()

Unnamed: 0,BSI,PBSI,CPBSI,TIRS
count,2002.0,2002.0,2002.0,2002.0
mean,-0.594997,0.594997,-0.988312,0.59828
std,0.087865,0.087865,0.006151,0.084193
min,-0.873915,0.145322,-0.999001,0.316674
25%,-0.646763,0.537304,-0.992486,0.542178
50%,-0.592467,0.592467,-0.989424,0.594292
75%,-0.537304,0.646763,-0.985302,0.647962
max,-0.145322,0.873915,-0.939862,0.873915


## Store the datasets containing all the extracted features, for all the fields

In [12]:
# Compressed .csv files, to take less memory space
filename = file_chooser.selected_path + "/" + file_chooser.selected_filename.split(".")[0]
fields_s2_features_extracted_df.to_csv(filename + "-s2-features-extracted.gz", header=True, index=False, compression='gzip')
fields_s1_features_extracted_df.to_csv(filename + "-s1-features-extracted.gz", header=True, index=False, compression='gzip')