# Sentinel-2 Autoencoder and Clustering

## Download the EuroSAT dataset


[EuroSAT dataset](https://github.com/phelber/EuroSAT)
![](https://raw.githubusercontent.com/phelber/EuroSAT/master/eurosat_overview_small.jpg)


Download the dataset

In [None]:
import requests

url =  'http://madm.dfki.de/files/sentinel/EuroSAT.zip'
r = requests.get(url, allow_redirects=True)
open('EuroSAT.zip', 'wb').write(r.content)

Unzip the dataset

In [None]:
!unzip '/content/EuroSAT.zip'

Rename the folder

In [None]:
!mv '/content/2750' '/content/EuroSAT'

Count images in the dataset

In [None]:
import glob

paths = glob.glob('/content/EuroSAT/*/*')
print('Number of images {}/27000'.format(len(paths)))

## Image Data Generator 

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

batch_size = 16

train_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    '/content/EuroSAT',  
    target_size=(64, 64),  
    batch_size=batch_size,
    class_mode='input',
    subset='training')

val_generator = train_datagen.flow_from_directory(
    '/content/EuroSAT',  
    target_size=(64, 64),  
    batch_size=batch_size,
    class_mode='input',
    subset='validation')  

In [None]:
classes = train_generator.class_indices
classes= dict(zip(classes.values(), classes.keys()))  
print(classes)

Generate some exaples

In [None]:
x,y = next(iter(train_generator))
print('x shape {}'.format(x.shape))
print('y shape {}'.format(y.shape))

Plot some examples

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig, axes = plt.subplots(nrows = 4, ncols = 4, figsize = (16,16))

ct = 0
for i in range(4):
  for j in range(4):
    axes[i,j].imshow(x[ct])
    axes[i,j].axis(False)
    ct += 1

fig.tight_layout()
plt.show()


## Define the Autoencoder


In [None]:
from re import X
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, UpSampling2D


model = None
input_img = Input(shape=(64, 64, 3)) 

x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D(pool_size=(2, 2), padding='same')(x)

x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(filters=3, kernel_size=(3, 3), activation='sigmoid', padding='same')(x)

model = Model(input_img, x)

Compile the model

In [None]:
from tensorflow.keras.optimizers import Adam, SGD

model.compile(loss='mse', optimizer=Adam(), metrics=['mae'])

Print model configuration

In [None]:
model.summary()

## Train the autoencoder

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch = train_generator.n//batch_size,
    validation_data = val_generator,
    validation_steps = val_generator.n//batch_size,
    epochs = 15
)

Plot training history

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,5))

axes[0].plot(history.history['loss'], '-*', label = 'Training')
axes[0].plot(history.history['val_loss'], '-o', label = 'Validtion')
axes[0].set_ylabel('Mean Squared Error')
axes[0].set_xlabel('Epochs')
axes[0].set_title('Model MSE over epochs')

axes[1].plot(history.history['mae'], '-*', label = 'Training')
axes[1].plot(history.history['val_mae'], '-o', label = 'Validtion')
axes[1].set_ylabel('Mean Absolute Error')
axes[1].set_xlabel('Epochs')
axes[1].set_title('Model MAE over epochs')

fig.tight_layout()
plt.show()

## Evaluate Results

In [None]:
x, y = next(iter(val_generator))
y_pred = model.predict(x)

In [None]:
for i in range(batch_size):
  print('\t --------------------------- Batch {} ---------------------------'.format(i))

  fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,5))
  axes[0].imshow(x[i])
  axes[0].axis(False)
  axes[0].set_title('Sentinel-2\n (Input)')

  axes[1].imshow(y_pred[i])
  axes[1].axis(False)
  axes[1].set_title('Sentinel-2\n (Autoencoder)')

  fig.tight_layout()
  plt.show()
  plt.close()

## Get access to features

In [None]:
encoder = Model(inputs=model.input, outputs=model.layers[6].output)

In [None]:
y_encoder = encoder.predict(x)

In [None]:
for i in range(batch_size):
  print('\t\t\t\t\t\t\t --------------------------- Batch {} --------------------------- \n'.format(i))

  fig, axes = plt.subplots(nrows = 1, ncols = 10, figsize = (30,3))
  axes[0].imshow(x[i])
  axes[0].axis(False)
  axes[0].set_title('Sentinel-2\n (Input)')

  for k in range(1,10):
    axes[k].imshow(y_encoder[i,:,:,k])
    axes[k].axis(False)
    axes[k].set_title('Sentinel-2\n (Encoder F-{})'.format(k))

  fig.tight_layout()
  plt.show()
  plt.close()

## Clustering

In [None]:
batch_size = 27000

train_datagen = ImageDataGenerator(
        rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/EuroSAT',  
    target_size=(64, 64),  
    batch_size=batch_size,
    class_mode='input')

In [None]:
x,_ = next(iter(train_generator))
y_encoder = encoder.predict(x)

K-means clustering

In [None]:
from sklearn.cluster import KMeans
y_encoder = y_encoder.reshape((27000,8*8*64))
kmeans = KMeans(n_clusters=10, random_state=0).fit(y_encoder)

In [None]:
clusters = kmeans.predict(y_encoder)

Evaluate results

In [None]:
i = 0
j = 0

cluster = 3

fig, axes = plt.subplots(nrows = 1, ncols = 8, figsize=(24,3))

while i < 8:

  if clusters[j] == cluster:
    axes[i].imshow(x[j])
    axes[i].axis(False)
    i += 1
  
  j+=1

fig.tight_layout()
plt.show()
    