# Land Cover Validation with LUCAS dataset

This is an example of a land cover product validation using LUCAS points. The process is using the class `Validator` to perform the main validation steps. 

In [None]:
!pip3 install geopandas pyyaml==6.0 ipyleaflet 
print("INSTALLATION COMPLETED")

Now **we have to restart runtime**: `Runtime -> Restart runtime`

In [None]:
import os
import sys
import yaml 

from osgeo import gdal
from osgeo import gdalconst
import geopandas as gpd
import numpy 
import urllib
import httpimport

import matplotlib.pyplot as plt 
%matplotlib inline

!git clone https://gitlab.com/geoharmonizer_inea/odse-workshop-2022.git
!(cd odse-workshop-2022/; git pull)
sys.path.insert(0, './odse-workshop-2022/st_lucas')
from validator import Validator

### Configure validation 

Check contents of the `config.yaml` file. 

In [None]:
# configuration with sample data 
config_file = "odse-workshop-2022/st_lucas/sample_land_cover/config.yaml"

with open(config_file, 'r') as file: 
    file_contents = file.read()
    
print(file_contents)

### Initialize the validator 

Initilize the validator by passing the config file or a Python dictionary with the same structure

In [None]:
validation = Validator(config_file)

### Check validity of the inputs

In [None]:
# Check if you can read the geodata 

inputs_valid = validation.check_inputs()
print('Validation data ready: {}'.format(inputs_valid))

In [None]:
# Check contents of the raster and vector geodata 

with open(config_file) as file:
    cfg = yaml.load(file, Loader=yaml.FullLoader)
    
# Vector data
vector_fn = os.path.join(cfg['input']['path'], cfg['input']['in_vec'])
gdf = gpd.read_file(vector_fn)

gdf.head()

In [None]:
# Check the legend

legend_file = 'odse-workshop-2022/st_lucas/sample_land_cover/legend.yaml'

with open(legend_file, "r") as file: 
    legend = file.read()
print(legend)

In [None]:
# view distribution of the classes 

attribute = cfg['input']['ref_att']
gdf[attribute].value_counts().plot.pie(figsize=(7, 7), autopct='%1.1f%%')

In [None]:
# Show a map of the data

from ipyleaflet import Map, GeoData, basemaps, LayersControl

gdf_4326 = gdf.to_crs("EPSG:4326")
center = gdf_4326.dissolve().centroid

m = Map(center=(float(center.y), float(center.x)), zoom = 7, basemap= basemaps.OpenStreetMap.Mapnik)

lucas_gd = GeoData(geo_dataframe = gdf_4326,
                   style={'color': 'black', 'fillColor': '#3366cc', 'opacity':0.05, 'weight':1.9, 'dashArray':'2', 'fillOpacity':0.6},
                   point_style={'radius': 2, 'color': 'red', 'fillOpacity': 0.8, 'fillColor': 'blue', 'weight': 3},
                   name='LUCAS points')

m.add_layer(lucas_gd)
m.add_control(LayersControl())

m

### Run land cover map and reference overlay

In [None]:
validation.overlay()

### Report the validation results 

In [None]:
# short report
validation.short_report()

In [None]:
# full report
validation.report()

In [None]:
# Save the validation report to a text file for a future use
validation.save_report()

### Plot confusion matrix

In [None]:
validation.show_confusion_matrix()

In [None]:
# Check the legend again

print(legend)

In [None]:
# Save the confusion matrix

validation.save_normalized_confusion_matrix()

In [None]:
# You can also save the plots into the validation directory for later use 
validation.save_confusion_matrix()
validation.save_normalized_confusion_matrix()

### Save the validation overlay geodata to a vector

 You can 

In [None]:
validation.save_vec()

### Classes aggregation 

In [None]:
config_aggregation = {
    'project':
      {'name': 'Geoharmonizer Land Cover validation', 
      'abbrev': 'cz_lc_18', 
      'run_id': '20210907'
      }, 
    'input':
      {'path': './odse-workshop-2022/st_lucas/sample_land_cover', 
      'in_ras': 'cz_land_cover_osm_2018.tif', 
      'ndv': 0, 
      'legend': 'legend.yaml', 
      'in_vec': 'cz_lucas_points_l1_2018.shp', 
      'ref_att': 'label_l1'
      }, 
    'report':
      {'path': './odse-workshop-2022/st_lucas/sample_land_cover', 
      'dir_name': 'lc_2018_validation_aggregation'
      }, 
    'validation_points': 
      {'file_name': 'validation_points', 
       'ogr_format': 'ESRI Shapefile',
       'epsg': 3035 
      }
}

In [None]:
validation_lc_aggregated = Validator(config_aggregation)

In [None]:
# 2: agriculture (arable land & grassland)
aggregartion = {
    2: [2, 6]
}

In [None]:
validation_lc_aggregated.overlay(aggregartion)

In [None]:
validation_lc_aggregated.report()

In [None]:
validation_lc_aggregated.show_confusion_matrix()