# Classification of SENTINEL 2 into Water/Not Water

In this notebook, we will use the extracted pixels data from the GEE to train and evaluate a decision tree classifier.

In [1]:
# Imports
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
import os
import numpy as np

## Data Preparation

In [2]:
# Read the data
DATA_PATH = '../../data-preparation/data/final-data/'

In [3]:
TRAIN_DATA_PATH = os.path.join(DATA_PATH, "train_data.csv")
df_train = pd.read_csv(TRAIN_DATA_PATH)
df_train.head()

Unnamed: 0,B2,B3,B4,B8,class_3,class_2,.geo,Contributor,coordinates
0,1122.0,1440.0,1754.0,1960.0,2,1,"{'geodesic': False, 'type': 'Point', 'coordina...",NK,"[146.5644056016995, -38.182756404207645]"
1,450.0,766.0,1002.0,2248.0,2,1,"{'geodesic': False, 'type': 'Point', 'coordina...",EG,"[144.29930361279213, -36.75695038525314]"
2,1323.0,1322.0,1153.0,1464.0,0,0,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[152.34102203623007, -27.554339120147525]"
3,991.0,1278.0,1672.0,2608.0,1,1,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[143.7278852605637, -37.910746536176255]"
4,1182.0,1596.0,2192.0,3113.0,1,1,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[143.8925464521428, -37.95476398509811]"


In [4]:
TEST_DATA_PATH = os.path.join(DATA_PATH, "test_data.csv")
df_test = pd.read_csv(TEST_DATA_PATH)
df_test.head()

Unnamed: 0,B2,B3,B4,B8,class_3,class_2,.geo,Contributor,coordinates
0,283.0,780.0,727.0,3580.0,2,1,"{'geodesic': False, 'type': 'Point', 'coordina...",MB,"[144.25887942500674, -36.7634182552988]"
1,1180.0,1474.0,1838.0,2488.0,0,0,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[143.59385662017306, -37.98755249296847]"
2,1014.5,1283.0,1621.0,2593.0,1,1,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[143.85553586243708, -37.94164858194996]"
3,504.5,782.5,1049.0,2591.0,2,1,"{'geodesic': False, 'type': 'Point', 'coordina...",RJ,"[141.59806955344473, -38.130294791615064]"
4,475.0,739.0,771.5,2585.5,2,1,"{'geodesic': False, 'type': 'Point', 'coordina...",AK,"[148.49533430491442, -37.748600627392676]"


In [5]:
# Check for any missing values
print (df_train.isnull().sum())
print (df_test.isnull().sum())

B2             0
B3             0
B4             0
B8             0
class_3        0
class_2        0
.geo           0
Contributor    0
coordinates    0
dtype: int64
B2             0
B3             0
B4             0
B8             0
class_3        0
class_2        0
.geo           0
Contributor    0
coordinates    0
dtype: int64


In [6]:
train_X, train_y = df_train[['B2', 'B3', 'B4', 'B8']], df_train[['class_3']]
test_X, test_y = df_test[['B2', 'B3', 'B4', 'B8']], df_test[['class_3']]

In [7]:
train_X.shape, test_X.shape, train_X.shape, train_y.shape

((4368, 4), (1092, 4), (4368, 4), (4368, 1))

## Normalize the data

In [8]:
X, y = np.concatenate((train_X, test_X)), np.concatenate((train_y, test_y))

In [9]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [10]:
train_X, train_y, test_X, test_y = X[:4368], y[:4368], X[4368:], y[4368:]

## Train Model

In [11]:
clf = DecisionTreeClassifier(criterion='entropy', max_depth=70)

In [12]:
clf.fit(train_X, train_y)

DecisionTreeClassifier(criterion='entropy', max_depth=70)

In [13]:
pred = clf.predict(test_X)

## Evaluate Model

In [14]:
print (classification_report(pred, test_y))

              precision    recall  f1-score   support

           0       0.90      0.80      0.85       405
           1       0.71      0.80      0.75       324
           2       0.80      0.80      0.80       363

    accuracy                           0.80      1092
   macro avg       0.80      0.80      0.80      1092
weighted avg       0.81      0.80      0.80      1092



In [15]:
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt

plot_confusion_matrix(clf, test_X, test_y) 
plt.figure(figsize=(8, 8))
plt.show()

<Figure size 640x480 with 2 Axes>

<Figure size 800x800 with 0 Axes>