In [7]:
# Cell for libraries installation on Google Colab
# NOTE: Write the libraries installation lines under this comment. Comment if needed.
!pip install rasterio



In [8]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import rasterio
from rasterio.plot import show
import tensorflow as tf
import numpy as np

In [9]:
# Cell for computation on Google Colab
# NOTE: Change the path to project as your convinience.
from google.colab import drive
drive.mount('/content/gdrive/')
path_to_project = "/content/gdrive/MyDrive/Colab_Notebooks/Ma511-ImageProcessing/project"
os.chdir(path_to_project)

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [10]:
# random seed
tf.random.set_seed(42)

# Load the data

In [11]:
# get absolute path of s1_60m and s2_60m folders
sentinel1_folder = 's1_60m'
sentinel2_folder = 's2_60m'

In [12]:
if (os.path.exists(sentinel1_folder)) and (os.path.exists(sentinel2_folder)):
  s1_image = rasterio.open(os.path.join(sentinel1_folder, os.listdir(sentinel1_folder)[0]))
  s2_image = rasterio.open(os.path.join(sentinel2_folder, os.listdir(sentinel2_folder)[0]))

  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
  ax1.imshow(s1_image.read(1))
  ax1.set_title('s1_image')

  ax2.imshow(s2_image.read(1))
  ax2.set_title('s2_image')

  plt.show()

else:
  print("sentinel1_folder and/or sentinel2_folder doesn't/don't exist(s).")

sentinel1_folder and/or sentinel2_folder doesn't/don't exist(s).


# Preprocess the data

In [14]:
unique_species = []
with open('labels/TreeSatBA_v9_60m_multi_labels.json', 'r') as f:
    labels = pd.read_json(f, typ='series')
    for prediction_list in labels.values:
        for species_prob in prediction_list:
            if species_prob[0] not in unique_species:
                species = species_prob[0]
                unique_species.append(species)

print(unique_species)
print(f"Length of the list: {len(unique_species)}")

['Pseudotsuga', 'Abies', 'Larix', 'Acer', 'Picea', 'Betula', 'Cleared', 'Fagus', 'Quercus', 'Fraxinus', 'Pinus', 'Alnus', 'Populus', 'Prunus', 'Tilia']
Length of the list: 15


In [15]:
def convert_to_dataframe(folder, filenames_file):
    image_vectors = []
    label_values = []

    with open(filenames_file, 'r') as f:
        filenames = [line.strip() for line in f]

    labels = pd.read_json('labels/TreeSatBA_v9_60m_multi_labels.json', typ='series')

    for filename in filenames:
        image_path = os.path.join(folder, filename)
        with rasterio.open(image_path) as image:
            # Convert the image to matrix of size (6, 6, 3)
            image_matrix = image.read().transpose(1, 2, 0)
            # Flatten the matrix to a one dimensional vector
            image_vector = image_matrix.reshape(-1) / 255.0
            image_vectors.append(image_vector)
            # Check if filename exists in labels before accessing it
            if filename in labels:
                label_values.append(labels[filename])
            else:
                label_values.append([])

    # convert image_vectors to a numpy array
    image_vectors = np.array(image_vectors)

    # create a dataframe from the numpy array
    df = pd.DataFrame(image_vectors)

    # create a column for each unique_species and fill it with 0 or it's probability if it is present in the label_values list of list (species, probability)
    for species in unique_species:
        df[species] = df.apply(lambda _: 0., axis=1)
    for i in range(len(label_values)):
        for species_prob in label_values[i]:
            species = species_prob[0]
            prob = species_prob[1]
            df.at[i, species] = prob
    return df

def save_dataframe(df, filename):
    df.to_csv(filename, index=False)

In [17]:
if not os.path.exists('dataframes/s1_train_df.csv'):
    s1_train_df = convert_to_dataframe(sentinel1_folder, 'train_filenames.lst')
    save_dataframe(s1_train_df, 'dataframes/s1_train_df.csv')
else:
    s1_train_df = pd.read_csv('dataframes/s1_train_df.csv')
    print("Loaded 's1_train_df.csv'")

if not os.path.exists('dataframes/s1_test_df.csv'):
    s1_test_df = convert_to_dataframe(sentinel1_folder, 'test_filenames.lst')
    save_dataframe(s1_test_df, 'dataframes/s1_test_df.csv')
else:
    s1_test_df = pd.read_csv('dataframes/s1_test_df.csv')
    print("Loaded 's1_test_df.csv'")

Loaded 's1_train_df.csv'
Loaded 's1_test_df.csv'


In [18]:
s1_train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,Betula,Cleared,Fagus,Quercus,Fraxinus,Pinus,Alnus,Populus,Prunus,Tilia
0,-0.020922,-0.024705,0.003321,-0.015662,-0.022292,0.002755,-0.015425,-0.029851,0.002026,-0.014232,...,0.0,0.0,0.0,0.18421,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.013941,-0.05302,0.001031,-0.014645,-0.049283,0.001165,-0.022493,-0.052342,0.001685,-0.02841,...,0.0,0.0,0.28104,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.028627,-0.054563,0.002057,-0.023881,-0.050735,0.001846,-0.024847,-0.038681,0.002519,-0.024817,...,0.0,0.0,0.97498,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-0.021111,-0.046596,0.001777,-0.022445,-0.046774,0.001882,-0.03076,-0.051142,0.002359,-0.034735,...,0.0,0.0,0.02866,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.00062,-0.051102,4.8e-05,0.004573,-0.049329,-0.000364,-0.004266,-0.032738,0.000511,-0.008283,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
# drop every column that are in the set() of unique_species
X_train = s1_train_df.drop(columns=unique_species)
y_train = s1_train_df[unique_species]
X_test = s1_test_df.drop(columns=unique_species)
y_test = s1_test_df[unique_species]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(45337, 108)
(45337, 15)
(5044, 108)
(5044, 15)


In [20]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,98,99,100,101,102,103,104,105,106,107
0,-0.020922,-0.024705,0.003321,-0.015662,-0.022292,0.002755,-0.015425,-0.029851,0.002026,-0.014232,...,0.000918,-0.005557,-0.034613,0.00063,-0.004718,-0.046172,0.000401,-0.015813,-0.045675,0.001358
1,-0.013941,-0.05302,0.001031,-0.014645,-0.049283,0.001165,-0.022493,-0.052342,0.001685,-0.02841,...,0.002249,-0.029854,-0.057899,0.002022,-0.026033,-0.057073,0.001789,-0.024913,-0.058316,0.001675
2,-0.028627,-0.054563,0.002057,-0.023881,-0.050735,0.001846,-0.024847,-0.038681,0.002519,-0.024817,...,0.003,-0.035016,-0.039731,0.003456,-0.038505,-0.053554,0.00282,-0.043319,-0.054788,0.003101
3,-0.021111,-0.046596,0.001777,-0.022445,-0.046774,0.001882,-0.03076,-0.051142,0.002359,-0.034735,...,0.001103,-0.025559,-0.05457,0.001837,-0.03453,-0.04944,0.002739,-0.033463,-0.058783,0.002232
4,-0.00062,-0.051102,4.8e-05,0.004573,-0.049329,-0.000364,-0.004266,-0.032738,0.000511,-0.008283,...,0.001826,-0.012654,-0.03776,0.001314,-0.014692,-0.041945,0.001374,-0.011614,-0.039516,0.001153


In [21]:
y_train.head()

Unnamed: 0,Pseudotsuga,Abies,Larix,Acer,Picea,Betula,Cleared,Fagus,Quercus,Fraxinus,Pinus,Alnus,Populus,Prunus,Tilia
0,0.0,0.81464,0.0,0.0,0.00115,0.0,0.0,0.0,0.18421,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.71896,0.0,0.0,0.0,0.0,0.0,0.28104,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.00556,0.0,0.01946,0.0,0.0,0.97498,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.97134,0.0,0.0,0.0,0.0,0.0,0.02866,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Train the model

In [36]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Flatten, Dense, Reshape

In [40]:
def create_cnn_model(input_shape=(108,)):
    model = Sequential([
    # Reshape input to (108, 1)
    Reshape((input_shape[0], 1), input_shape=input_shape),

    Conv1D(1024, 3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),

    Conv1D(1024, 3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),

    Conv1D(1024, 3, activation='relu'),
    BatchNormalization(),

    Flatten(),

    Dense(512, activation='relu'),
    BatchNormalization(),

    Dense(256, activation='relu'),
    BatchNormalization(),

    Dense(len(unique_species), activation='softmax')
    ])

    opt = keras.optimizers.Adam(learning_rate=0.001)
    loss_fn = keras.losses.CategoricalCrossentropy()
    model.compile(optimizer=opt, loss=loss_fn, metrics=['accuracy'])

    return model


In [41]:
# train the model
model = create_cnn_model()

# model summary
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_4 (Reshape)         (None, 108, 1)            0         
                                                                 
 conv1d_12 (Conv1D)          (None, 106, 1024)         4096      
                                                                 
 batch_normalization_20 (Ba  (None, 106, 1024)         4096      
 tchNormalization)                                               
                                                                 
 max_pooling1d_8 (MaxPoolin  (None, 53, 1024)          0         
 g1D)                                                            
                                                                 
 conv1d_13 (Conv1D)          (None, 51, 1024)          3146752   
                                                                 
 batch_normalization_21 (Ba  (None, 51, 1024)         

In [44]:
# Fit the model
history = model.fit(X_train, y_train, epochs=500, batch_size=128, validation_data=(X_test, y_test))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500

KeyboardInterrupt: 

In [None]:
# Plot the loss and accuracy curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

ax1.plot(history.history['loss'], label='loss')
ax1.plot(history.history['val_loss'], label='val_loss')
ax1.legend()

ax2.plot(history.history['accuracy'], label='accuracy')
ax2.plot(history.history['val_accuracy'], label='val_accuracy')
ax2.legend()

plt.show()

In [None]:
# Save the model
model.save('CADET_CHAOUKI_KANTANE.h5')