# Welcome to this tutorial ! 
            
### There are 3 different steps :   
>- 1) Install and import libraries, create folders and define parameters.  
>    *Variables followed by **#@param** are variables, you can change them.*
>- 2) Load and process your dataset.
>- 3) Train and evaluate your model.    

# 1) Libraries
### First create a new **virtual environment** then install all requirements by running the following :

In [1]:
!pip install -r requirements.txt



### Create all folders you will need

In [1]:
from utils import create_folders
create_folders()

### Your directory shoud be as following :
Check if the folders (the ones **in bold**) are in your directory.
- **Main folder**
    >- **models**
    >    >* .joblib files (sklearn models)
    >    >* .sav files (mappers such as pca and umap)
    >    >* folders (tensorflow models)
    >- **results**
    >    >* .png images (confusion matrices)
    >    >* .log files (tensorflow training curves)
    >- **data**
    >    >- **train**
    >    >    * train*.tfrecord.gz files (training dataset)
    >    >- **eval**
    >    >    * traineval*.tfrecord.gz files (evaluation dataset)
    >    >- **inference**
    >    >   * .tfrecord.gz files (inference dataset)
    >    >   * *-mixer.json files (needed for georeferencing, if you want to add the prediction to Earth Engine Editor)
    >    >- **predictions**
    >    >    - **colored_pipes**
    >    >        * .kml files (colored-pipe nets corresponding to labels)
    >    >    - **kml**
    >    >        * .kml files and corresponding .png images (mask-prediction images)
    >    >    - **tfrecords**
    >    >        * .TFRecord files (needed if you want to add the prediction to Earth Engine Editor)
    >    >    * .csv files

### Import, authenticate and initialize the Earth Engine library.  
If you have a gmail account, do so with yours, if not, you can use this one :  
Gmail adress : [mounierseb93@gmail.com]    
Code : [mounse$15]

In [None]:
import ee
ee.Authenticate()
ee.Initialize()

In [None]:
import tensorflow as tf
from dataset_loader import TFDatasetProcessing, NPDatasetProcessing, undersample
from models import ModelTrainingAndEvaluation

In [None]:
# Specify inputs (Landsat bands) to the model and the response variable.
LANDSAT  = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11']
SENTINEL = ['VV','VH','VV_1','VH_1']
BANDS    = LANDSAT + SENTINEL
RESPONSE = 'landcover'
FEATURES = BANDS+[RESPONSE]

# Specify the size and shape of patches expected by the model.

KERNEL_SIZE   = 128 #@param {type:"integer"}

KERNEL_SHAPE  = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS       = [tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))
NUM_FEATURES  = len(BANDS)
NUM_CLASSES   = 4

# 2) Dataset Loading and Processing
If you don't have access to the training dataset, download it from the Google Drive in this address and password (and make sur to add it the right folder with the same name as in the drive) :    
Gmail adress : [mounierseb93@gmail.com]  
Code : [mounse$15]  
If it's not there for some reason, or if you want to construct your own dataset, go to tuto_dataset_construction.ipynb and run it.

In [None]:
# Specify training parameters
TRAIN_SIZE = 5000 #@param {type:"integer"}
EVAL_SIZE  = 3000 #@param {type:"integer"}

In [None]:
# Load and process training and evaluation tf.Datasets
tfdataloader = TFDatasetProcessing(FEATURES_DICT,FEATURES,BANDS,NUM_FEATURES,batch_size=BATCH_SIZE)
training     = tfdataloader.get_training_dataset()
evaluation   = tfdataloader.get_eval_dataset()
NUM_FEATURES = tfdataloader.num_features

In [None]:
print(iter(evaluation.take(1)).next())

In [None]:
# Specify preprocessing parameters

# Whether to undersample (ie take the same number of pixels from each class)
UNDERSAMPLING = True #@param {type:'boolean'}

# The number of pixels taken from each class if undersampling=True, if None, set to the number of the rearest class
SAMPLES_PER_CLASS = None #@param (None or integer)

# Whether to add misclassified pixels from previous model 
# do not use this if you haven't learnt how to create them. Cf to my tutorial "eeTutoriel.ipynb"
MISCLASSIFIED_PIXELS = False #@param {type:'boolean'}

# The name of the Asset of misclassified pixels, if misclassified_pixels=True
ASSETID = "users/leakm/misclassified_pixels"

In [None]:
# Convert tf.Datasets to numpy arrays
npdataloader = NPDatasetProcessing(NUM_FEATURES,NUM_CLASSES)
train        = npdataloader.tf_to_numpy(training,TRAIN_SIZE)
eval         = npdataloader.tf_to_numpy(evaluation,EVAL_SIZE)
del training,evaluation

if UNDERSAMPLING :
    train['features'],train['labels'] = undersample(train['features'],train['labels'],SAMPLES_PER_CLASS)
    eval['features'] ,eval['labels']  = undersample(eval['features'],eval['labels'],SAMPLES_PER_CLASS)

if MISCLASSIFIED_PIXELS :
    train = npdataloader.adding_more_pixels(train,ASSETID,tfdataconstructor,tfdataloader)

# 4) Training
You can :  
1- train your model and evaluate it, or   
2- load a saved model and evaluate it

In [None]:
# Specify model parameters

# Careful, the name of your model should contain the model type such as : knn_something-something, or something-rf_something
MODEL_NAME   = 'rf' #@param ["knn", "svm", "rf","pca_rf","umap_rf"] 
LABEL_NAMES  = [0,1,2,3]
TARGET_NAMES = ['field','forest','urbain','water']

In [None]:
# 1) Model fitting, if you want to (re)train your model
FINETUNE     = False #@param {type: 'boolean'}

model = ModelTrainingAndEvaluation(MODEL_NAME,train,eval,FINETUNE)
if 'pca' in MODEL_NAME :
    model.pca(NUM_CLASSES)
elif 'umap' in MODEL_NAME :
    model.umap(NUM_CLASSES)
elif 'knn' in MODEL_NAME :
    model.knn() #check the parameters you can pass as arguments
elif 'svm' in MODEL_NAME :
    model.svm() #check the parameters you can pass as arguments
elif 'rf' in MODEL_NAME :
    model.rf() #check the parameters you can pass as arguments

# Model evaluation
%matplotlib inline
model.eval_model(LABEL_NAMES,TARGET_NAMES)

In [None]:
# 2) If you already trained and saved your model
from joblib import load
MODEL_NAME = 'rf' #the name of the model (without the extension), it should be the same as the one in your folder "models"
model = ModelTrainingAndEvaluation(MODEL_NAME,train,eval,False)
model.model = load('models/'+MODEL_NAME+'.joblib')
model.eval_model(LABEL_NAMES,TARGET_NAMES)