In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
! pip install geopandas
! pip install rasterio
! pip install rasterstats
! pip install --quiet contextily matplotlib
! pip install torch 
! pip install earthpy
! pip install gdal 
! pip install cupy-cuda111     #>=7.7.0,<8.0.0'
! pip install torchfcn
! pip install pyyaml h5py 
! pip install breizhcrops

In [None]:
import geopandas as gpd
import pandas as pd

import rasterio
from rasterio.plot import show

from matplotlib import pyplot
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torchvision

import tensorflow as tf
from tensorflow import keras

import breizhcrops

import numpy as np
import os
import glob
import math
from osgeo import gdal
from datetime import datetime
import PIL
import sys
import time
import seaborn as sn
from sklearn.metrics import f1_score , accuracy_score , precision_score, recall_score,label_ranking_average_precision_score,balanced_accuracy_score,average_precision_score,multilabel_confusion_matrix


# Load Dataset

In [None]:
class CropSegmentationDataset(torch.utils.data.Dataset):
  def __init__(self, path, is_train=True,test_size=0.2, n_classes=9,transform=None, target_transform=None):
        self.dataset_df = pd.read_csv(path)
        self.dataset_df = self.dataset_df.loc[self.dataset_df['Classcode'].isin(range(n_class))]
        bands =[]
        for i in range(28):
          bands.append(str(i))
        self.values = self.dataset_df[bands].astype(float)
        self.labels = self.dataset_df["Classcode"].astype(float)
        self.test_size = test_size
        self.transform = transform
        self.target_transform = target_transform

        #turn 28 seperate bands of 7 timesteps af 4 band as into a matrixes of 7x4
        valuelist = []
        for i in range(len(self.values)):
            matrixdata = np.zeros((7,4))
            for j in range(7):
              for k in range(4):
                l = (j*4)+k # l values go from 0 to 27 
                matrixdata[j,k] = self.values.iloc[i,l]
            valuelist.append(matrixdata)

        self.valuelist = valuelist




         # Split the data into train and test
        x_train, x_test, y_train, y_test = train_test_split(self.valuelist, self.labels, test_size= test_size, random_state=42)

        if is_train:
            self.valuelist = x_train
            self.labels = y_train
        else:
            self.valuelist = x_test
            self.labels = y_test

  def __len__(self):
        return len(self.labels)

  def __getitem__(self, idx):
        values = self.valuelist[idx]
        label = self.labels.iloc[idx]
        values = torch.tensor(values).type(torch.float32)
        label = torch.tensor(label).type(torch.float32)
        label.type(torch.LongTensor)
        if self.transform:
            values = self.transform(values)
        if self.target_transform:
            label = self.target_transform(label)
        return values, label

In [None]:
load_path = "/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/Data/complete_df.csv"
test_size = 0.2
n_class = 9
n_bands = 4

dataset_train = CropSegmentationDataset(load_path, True, test_size = test_size,n_classes=n_class)
dataset_test = CropSegmentationDataset(load_path, False, test_size = test_size,n_classes=n_class)

# dataloader training
dataloader_training = torch.utils.data.DataLoader(dataset=dataset_train,
                                 batch_size=16,
                                 shuffle=True)
# dataloader validation
dataloader_validation = torch.utils.data.DataLoader(dataset=dataset_test,
                                   batch_size=16,
                                   shuffle=True)

# Training

In [None]:
# train


In [None]:
class Trainer:
    def __init__(self,
                 model: torch.nn.Module,
                 device: torch.device,
                 criterion: torch.nn.Module,
                 optimizer: torch.optim.Optimizer,
                 training_DataLoader: torch.utils.data.Dataset,
                 validation_DataLoader: torch.utils.data.Dataset = None,
                 lr_scheduler: torch.optim.lr_scheduler = None,
                 epochs: int = 100,
                 epoch: int = 0,
                 notebook: bool = False
                 ):

        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        self.training_DataLoader = training_DataLoader
        self.validation_DataLoader = validation_DataLoader
        self.device = torch.device(device)
        self.epochs = epochs
        self.epoch = epoch
        self.notebook = notebook

        self.training_loss = []
        self.validation_loss = []
        self.learning_rate = []

    def run_trainer(self):

        if self.notebook:
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange

        progressbar = trange(self.epochs, desc='Progress')
        for i in progressbar:
            """Epoch counter"""
            self.epoch += 1  # epoch counter

            """Training block"""
            self._train()

            """Validation block"""
            if self.validation_DataLoader is not None:
                self._validate()

            """Learning rate scheduler block"""
            if self.lr_scheduler is not None:
                if self.validation_DataLoader is not None and self.lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau':
                    self.lr_scheduler.batch(self.validation_loss[i])  # learning rate scheduler step with validation loss
                else:
                    self.lr_scheduler.batch()  # learning rate scheduler step
        return self.training_loss, self.validation_loss, self.learning_rate

    def _train(self):

        if self.notebook:
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange

        self.model.train()  # train mode
        train_losses = []  # accumulate the losses here
        batch_iter = tqdm(enumerate(self.training_DataLoader), 'Training', total=len(self.training_DataLoader),
                          leave=False)

        for i, (x, y) in batch_iter:
            y = y.type(torch.LongTensor)
            y = torch.nan_to_num(y)
            x = torch.nan_to_num(x)
            input, target = x.to(self.device), y.to(self.device)  # send to device (GPU or CPU)
            self.optimizer.zero_grad() # zerograd the parameters

            out = self.model(input)# one forward pass

            loss = self.criterion(out, target)
            loss = torch.nan_to_num(loss)  # calculate loss
            loss_value = loss.item()
            train_losses.append(loss_value)
            loss.backward()  # one backward pass
            self.optimizer.step()  # update the parameters

            batch_iter.set_description(f'Training: (loss {loss_value:.4f})')  # update progressbar

        self.training_loss.append(np.mean(train_losses))
        self.learning_rate.append(self.optimizer.param_groups[0]['lr'])

        batch_iter.close()

    def _validate(self):

        if self.notebook:
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange

        self.model.eval()  # evaluation mode
        valid_losses = []  # accumulate the losses here
        batch_iter = tqdm(enumerate(self.validation_DataLoader), 'Validation', total=len(self.validation_DataLoader),
                          leave=False)

        for i, (x, y) in batch_iter:
            y = y.type(torch.LongTensor)
            y = torch.nan_to_num(y)
            x = torch.nan_to_num(x)
            input, target = x.to(self.device), y.to(self.device)  # send to device (GPU or CPU)

            with torch.no_grad():
                out = self.model(input)
                loss = self.criterion(out, target)
                loss_value = loss.item()
                valid_losses.append(loss_value)

                batch_iter.set_description(f'Validation: (loss {loss_value:.4f})')

        self.validation_loss.append(np.mean(valid_losses))


In [None]:
# model
n_class = 9 #amount of classes +1 as pixels that are not labelled need a seperate class
n_channel = 4
dropout = 0

model = breizhcrops.models.LSTM(input_dim=n_channel, num_classes=n_class, hidden_dims=128, num_layers=4, dropout=dropout, bidirectional=True, use_layernorm=True)
#model = breizhcrops.models.MSResNet(input_dim=n_channel, layers=[1, 1, 1, 1], num_classes=n_class, hidden_dims=32)
#model = breizhcrops.models.OmniScaleCNN(input_dim= n_channel, num_classes=n_class,sequencelength=45 ,paramenter_number_of_layer_list=[8 * 128, 5 * 128 * 256 + 2 * 256 * 128], few_shot=False)
#model = breizhcrops.models.PETransformerModel(input_dim=n_channel, num_classes=n_class, d_model=64, n_head=2, n_layers=5,d_inner=128, activation="relu", dropout=dropout, max_len=200)
#model = breizhcrops.models.StarRNN( input_dim= n_channel, num_classes= n_class, hidden_dims=128, num_layers=3, dropout= dropout, bidirectional=False,use_batchnorm=False, use_layernorm=True, device=torch.device("cuda"))



#model = breizhcrops.models.InceptionTime(num_classes=n_class, input_dim=n_channel ,num_layers=6, hidden_dims=128,use_bias=False, use_residual= True, device=torch.device("cuda")) #NOTWORKING
#model = breizhcrops.models.TempCNN( input_dim= n_channel, num_classes= n_class, sequencelength=45, kernel_size=7, hidden_dims=128, dropout=dropout) #NOTWORKING
#model = breizhcrops.models.TransformerModel( input_dim=n_channel, num_classes=n_channel, d_model=64, n_head=2, n_layers=5,d_inner=128, activation="relu", dropout=dropout) #not working #CUDA ISSUE


model = model.cuda()
#model = model.to(torch.device('cuda:0'))

# criterion
# NEEDS TO BE CHANGED FOR THE ONES USED IN THE PAPER
criterion = torch.nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# trainer
trainer = Trainer(model=model,
                  device='cuda',
                  criterion=criterion,
                  optimizer=optimizer,
                  training_DataLoader=dataloader_training,
                  validation_DataLoader=dataloader_validation,
                  lr_scheduler=None,
                  epochs=10,
                  epoch=0,
                  notebook=True)

# start training
training_losses, validation_losses, lr_rates = trainer.run_trainer()

 #change the name depending on the model used

Progress:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

Training:   0%|          | 0/1155 [00:00<?, ?it/s]

Validation:   0%|          | 0/289 [00:00<?, ?it/s]

# Save Model

In [None]:
from datetime import date
today = datetime.now()
# dd/mm/YY/Hour/Minutes
d1 = today.strftime("_%d%m%H%M")
#save the entire model to drive
model_name = model.__class__.__name__ + d1
path = "/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/models/" + model_name + ".pt"
torch.save(model,path) 

In [None]:
#print list of available models 
model_list = os.listdir('/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/models')
print(len(model_list))
model_list


20


['unet3d.pt',
 'unet3d_9class.pt',
 'unet3d_9class_trainedto9classesinsteadof10.pt',
 'LSTM',
 'LSTM.pt',
 'MSRESNET.pt',
 'MSResNet.pt',
 'MSResNet24_04_2023_09_07.pt',
 'LSTM24_04_2023_09_34.pt',
 'LSTM_24040935.pt',
 'LSTM_24040944.pt',
 'LSTM_24040957.pt',
 'LSTM_24041002.pt',
 'OmniScaleCNN_24041012.pt',
 'PETransformerModel_24041020.pt',
 'StarRNN_24041031.pt',
 'MSResNet_25041143.pt',
 'MSResNet_25041238.pt',
 'MSResNet_25041240.pt',
 'LSTM_25041242.pt']

In [None]:
#load a trained model
model_list = os.listdir('/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/models')
latest = len(model_list)




model_name = model_list[latest]

model_path = "/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/models/"+ model_name  #path for Unet3d

model = torch.load(model_path)
model.eval()

IndexError: ignored

# Model Evaluation

In [None]:
class Evaluate:
    def __init__(self,
                 model: torch.nn.Module,
                 device: torch.device,
                 validation_dataloader: torch.utils.data.Dataset = None,
                 savepath = "",
                 n_classes: int = 9
                 ):
      self.n_classes = n_classes
      self.savepath = savepath
      self.model = model
      self.dataloader_validation = validation_dataloader
      self.model.eval()
      self.y_pred = []
      self.y_true = []
      self.df_out = pd.DataFrame()
      self.metric_list = []
      self.recall_list = []
      self.precision_list =[]
      self.f1_list = []
      self.label_list = []
      for i in range(self.n_classes):
        self.label_list.append(i)
     

    def predict_output(self):
      pred = []
      true = [] 
      # iterate over test data
      for inputs, labels in self.dataloader_validation:
              output = self.model(inputs.cuda()) # Feed Network

              output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
              pred.extend(output) # Save Prediction
              
              labels = labels.data.cpu().numpy()
              true.extend(labels) # Save Truth
      self.y_pred = pred
      self.y_true = true

    def create_conf_matrix(self):
      true = np.array(self.y_true).astype(int)
      pred = np.array(self.y_pred).astype(int)

      true = true.reshape((true.shape[0], -1))
      pred = pred.reshape((pred.shape[0], -1))

      cf_matrix = confusion_matrix(true.reshape(-1), pred.reshape(-1))
      df_cm = pd.DataFrame(cf_matrix)

      return df_cm

    def show_confusion_matrix(self):
      plt.figure(figsize = (12,7))
      sn.heatmap(self.create_conf_matrix(), annot=True)
      plt.savefig('output.png')

    def show_output(self):
      print("confusion matrix")
      print(self.create_conf_matrix())
      print("output matrix")
      print(self.df_out.head(self.n_classes))

    def calculate_metrics(self): #calculates the metrics that are needed
      true = self.y_true
      pred = self.y_pred

      self.metric_list = [
              'F1 score weighted:' , f1_score(true, pred,labels=self.label_list,average='weighted',zero_division=0),
              'F1 score micro:' , f1_score(true, pred,labels=self.label_list,average='micro',zero_division=0),
              'F1 score macro:' , f1_score(true, pred,labels=self.label_list,average='macro',zero_division=0),
              'balanced accuracy score:' , balanced_accuracy_score(true, pred),
              " "
              ]
      self.recall_list = recall_score(true, pred,average=None)
      self.precision_list = precision_score(true, pred,average=None)
      self.f1_list = f1_score(true, pred,average=None,zero_division=0)
    
    def evaluate(self):
      self.predict_output()
      confusionmatrix = self.create_conf_matrix()
      self.calculate_metrics()
      df_out = confusionmatrix
      df_out.insert(self.n_classes,"recall",self.recall_list,False)
      df_out.insert(self.n_classes+1,"precision",self.precision_list,False)
      df_out.insert(self.n_classes+2,"f1_score",self.f1_list,False)
      df_out.insert(self.n_classes+3,"metrics",self.metric_list,False)
      self.df_out = df_out
      return self.df_out, self.create_conf_matrix()

    def save_output(self):
      today = datetime.now()
      model_name = model.__class__.__name__ + today.strftime("_%d%m%H%M")
      path = self.savepath  + model_name
      self.df_out.to_csv(path)


Next block evaluates the model and saves the output to the specified path

In [None]:
path = '/content/drive/My Drive/INF-Msc_Thesis/Msc-Thesis-Crop-Type-Mapping/model_output/' 

#initialize evaluation
evaluate = Evaluate(
    model = model,
    device = 'cuda',
    validation_dataloader = dataloader_validation,
    savepath = path 
)

#get the output matrix and confusion matrix from evaluation
df_out , df_cm = evaluate.evaluate()

#save the output for later use
evaluate.save_output()

In [None]:
#shows the confusion matrix and saved output matrix
evaluate.show_output()

confusion matrix
      0   1   2    3    4    5    6    7    8
0  2390   6   0    1   15    6    1    1    0
1   426  20   1   10    8   10    3    1    4
2     8   1  10    0    2   10    2    0   18
3     3   0   0  154    2    8   17    1    0
4    39   7   0   12  438    3   23    2    0
5     0   2   0   15    4  100    9    1    4
6     7   1   0    6   55   13  374    7    4
7     1   0   0    0    0   22   39  136    0
8    10   2   2    1    5   11    5    2  118
output matrix
      0   1   2    3    4    5    6    7    8    recall  precision  f1_score  \
0  2390   6   0    1   15    6    1    1    0  0.987603   0.828710  0.901207   
1   426  20   1   10    8   10    3    1    4  0.041408   0.512821  0.076628   
2     8   1  10    0    2   10    2    0   18  0.196078   0.769231  0.312500   
3     3   0   0  154    2    8   17    1    0  0.832432   0.773869  0.802083   
4    39   7   0   12  438    3   23    2    0  0.835878   0.827977  0.831909   
5     0   2   0   15    4  10