In [1]:
# Libraries
import json, pandas as pd, pydeck as pdk, ee, ipyfilechooser, ipywidgets, datetime, sys
sys.path.append('./utils')
import sentinel_satellites

In [2]:
# Initializes the Google Earth Engine APIs
ee.Authenticate()
ee.Initialize()


Successfully saved authorization token.


# Exloratory analysis

The project aim is to create a Machine Learning model capable of detecting the dates when a crop field has been manured, using satellite data. <br>
Before even starting considering models, it is useful to perform exploratory analysis on the obtained crop fields dataset.

## Import a JSON file containing crop fields details

In [3]:
# Choose the file (it must be a JSON file)
file_chooser = ipyfilechooser.FileChooser(path='../Datasets/main/', filename="main-crops.json", select_default=True, use_dir_icons=True, filter_pattern='*.json')
display(file_chooser)

FileChooser(path='/Users/francesco/Documents/Documents/Università (Università degli Studi, PV)/2_ MAGISTRALE…

In [4]:
# Load JSON data from file
with open(file_chooser.selected) as f:
    data = json.load(f)

# Create DataFrame with properties
fields_df = pd.DataFrame([f["properties"] for f in data["features"]])

# Add column with coordinates for each field
fields_df["polygon_coordinates"] = [[tuple(c) for c in p] for f in data["features"] for p in f["geometry"]["coordinates"]]

In [5]:
# Show the dataframe
fields_df

Unnamed: 0,crop_field_name,manure_dates,polygon_coordinates
0,P-BLD,[2022-05-26],"[(-4.202723286616649, 43.39683579015289), (-4...."
1,P-BLLT1,[2022-05-16],"[(-4.085622203603083, 43.429605845026266), (-4..."
2,P-BLLT2,[2022-05-26],"[(-4.084840437376829, 43.430826294936246), (-4..."
3,P-Cardana,[2022-02-24],"[(8.658803437240303, 45.85842753378426), (8.65..."
4,P-CBRCS1,[2022-05-26],"[(-4.200826431306206, 43.39067464298489), (-4...."
5,P-CBRCS2,[2022-05-26],"[(-4.204911872695676, 43.3876170244562), (-4.2..."
6,P-CLGT,[2022-05-16],"[(-4.111699726693341, 43.39830644556494), (-4...."
7,P-CLMBRS,[2022-05-26],"[(-4.544769098140127, 43.38040395682432), (-4...."
8,P-CMNTR,[2022-05-16],"[(-4.147208715069137, 43.40038457218137), (-4...."
9,P-DR,[2022-03-21],"[(-4.142486752802821, 43.396858931472195), (-4..."


## Show crop fields on a map

In [6]:
# Define the layer with a tooltip
layer = pdk.Layer(
    "PolygonLayer",
    data=fields_df,
    get_polygon="polygon_coordinates",
    get_fill_color=[255, 255, 0, 100],
    get_line_color=[255, 255, 0, 100],
    stroked=True,
    filled=True,
    lineWidthMinPixels=3,
    pickable=True,
    auto_highlight=True,
)

# Define the initial view state of the map
view_state = pdk.ViewState(
    longitude=fields_df.polygon_coordinates[0][0][0],
    latitude=fields_df.polygon_coordinates[0][0][1],
    zoom=7.8
)

# Create the map with the layers and the initial view state
r = pdk.Deck(layers=layer, initial_view_state=view_state,)

# Show the map
r.show()


DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…

It can be noticed that our fields are placed in the Northern part of Spain. Please consider generalization issue.

## Features extraction

The objective is to generate a dataset that contains for each field, for each time the satellites (sentinel 1 and sentinel 2) have passed on the field (in a period, specified by the user), all the phisical indicators that will be further used to build the final model. <br>
This procedure has been designed to be performed in parallel in order to exploit the computational power of the machine (since each field is indipendent with the others).

In [7]:
start_date_widget = ipywidgets.widgets.DatePicker(description='Start date', value=datetime.date(2022, 1, 1), disabled=False)
display(start_date_widget)

end_date_widget = ipywidgets.widgets.DatePicker(description='End date', value=datetime.date(2022, 12, 31), disabled=False)
display(end_date_widget)

DatePicker(value=datetime.date(2022, 1, 1), description='Start date')

DatePicker(value=datetime.date(2022, 12, 31), description='End date')

### Sentinel 2 (optical features)

In [8]:
# Get all the mean features for the crop fields inside the dataframe, within a time period, using sentinel 2 satellites
fields_s2_features_extracted_df = sentinel_satellites.get_features(fields_df, start_date_widget.value, end_date_widget.value, sentinel=2)

# Show the dataframe
fields_s2_features_extracted_df

Unnamed: 0,crop_field_name,s2_acquisition_date,NDVI,EOMI1,EOMI2,EOMI3,EOMI4,NBR2,SAVI,MSAVI,manure_dates
0,P-BLLT2,2022-01-06,0.801121,-0.325992,0.395997,-0.059260,0.629556,0.314313,1.143909,0.888510,[2022-05-26]
1,P-BLLT2,2022-01-16,0.790695,-0.276669,0.431784,-0.004057,0.645580,0.300242,1.129021,0.881761,[2022-05-26]
2,P-BLLT2,2022-01-26,0.562788,-0.227903,0.191910,0.142593,0.375516,0.198084,0.803614,0.719048,[2022-05-26]
3,P-BLLT2,2022-02-05,0.568755,-0.236253,0.185224,0.128848,0.380589,0.210395,0.812135,0.724053,[2022-05-26]
4,P-BLLT2,2022-02-10,0.405753,-0.241306,0.016751,0.194977,0.174249,0.158007,0.579386,0.576701,[2022-05-26]
...,...,...,...,...,...,...,...,...,...,...,...
774,P-Cardana,2022-11-25,0.503090,-0.181490,0.186710,0.200354,0.353696,0.179237,0.718356,0.664659,[2022-02-24]
775,P-Cardana,2022-11-27,0.152719,-0.016941,0.138957,0.450380,0.146745,0.007748,0.218071,0.263238,[2022-02-24]
776,P-Cardana,2022-12-07,0.483323,-0.194247,0.163323,0.201698,0.317792,0.163305,0.690128,0.646390,[2022-02-24]
777,P-Cardana,2022-12-17,0.455992,-0.172851,0.151109,0.226672,0.304799,0.161497,0.651103,0.620339,[2022-02-24]


### Sentinel 1 (radar features)

In [9]:
# Get all the mean features for the crop fields inside the dataframe, within a time period, using sentinel 1 satellites
fields_s1_features_extracted_df = sentinel_satellites.get_features(fields_df, start_date_widget.value, end_date_widget.value, sentinel=1)

# Show the dataframe
fields_s1_features_extracted_df

Unnamed: 0,crop_field_name,s1_acquisition_date,BSI,PBSI,CPBSI,TIRS,manure_dates
0,P-BLLT1,2022-01-07,-0.615842,0.615842,-0.985381,0.618768,[2022-05-16]
1,P-BLLT1,2022-01-08,-0.621501,0.621501,-0.986264,0.621812,[2022-05-16]
2,P-BLLT1,2022-01-19,-0.657800,0.657800,-0.989426,0.659467,[2022-05-16]
3,P-BLLT1,2022-01-20,-0.593962,0.593962,-0.986463,0.595994,[2022-05-16]
4,P-BLLT1,2022-01-31,-0.555652,0.555652,-0.984867,0.557127,[2022-05-16]
...,...,...,...,...,...,...,...
2005,P-VG1,2022-12-15,-0.569286,0.569286,-0.986644,0.571793,[2022-04-09]
2006,P-VG1,2022-12-21,-0.506345,0.506345,-0.982987,0.506965,[2022-04-09]
2007,P-VG1,2022-12-22,-0.538753,0.538753,-0.984511,0.542085,[2022-04-09]
2008,P-VG1,2022-12-26,-0.492030,0.492030,-0.974037,0.498365,[2022-04-09]


## Analysis

### General, simple yet useful statistics (for both sentinel 1 and sentinel 2)

In [10]:
fields_s2_features_extracted_df.describe()

Unnamed: 0,NDVI,EOMI1,EOMI2,EOMI3,EOMI4,NBR2,SAVI,MSAVI
count,779.0,779.0,779.0,779.0,779.0,779.0,779.0,779.0
mean,0.444667,-0.161461,0.179976,0.221867,0.330907,0.16989,0.634937,0.58507
std,0.201061,0.141119,0.136863,0.17452,0.167721,0.065969,0.287091,0.206764
min,0.011629,-0.52831,-0.350105,-0.350925,-0.214569,0.007748,0.016606,0.022675
25%,0.304264,-0.275585,0.150263,0.097871,0.303373,0.139627,0.434457,0.462922
50%,0.458626,-0.159874,0.189342,0.241391,0.356751,0.174864,0.654873,0.625134
75%,0.584209,-0.054712,0.218365,0.3748,0.383015,0.198568,0.834198,0.736632
max,0.942142,0.160089,0.664984,0.530544,0.827342,0.376216,1.345279,0.970074


In [11]:
fields_s1_features_extracted_df.describe()

Unnamed: 0,BSI,PBSI,CPBSI,TIRS
count,2002.0,2002.0,2002.0,2002.0
mean,-0.594997,0.594997,-0.988312,0.59828
std,0.087865,0.087865,0.006151,0.084193
min,-0.873915,0.145322,-0.999001,0.316674
25%,-0.646763,0.537304,-0.992486,0.542178
50%,-0.592467,0.592467,-0.989424,0.594292
75%,-0.537304,0.646763,-0.985302,0.647962
max,-0.145322,0.873915,-0.939862,0.873915


## Store the datasets containing all the extracted features, for all the fields

In [12]:
# Compressed .csv files, to take less memory space
filename = file_chooser.selected_path + "/" + file_chooser.selected_filename.split(".")[0]
fields_s2_features_extracted_df.to_csv(filename + "-s2-features-extracted.gz", header=True, index=False, compression='gzip')
fields_s1_features_extracted_df.to_csv(filename + "-s1-features-extracted.gz", header=True, index=False, compression='gzip')