# COMPUTATION OF MSE BETWEEN ATLAS AND GEOCLIMATE

<div class="alert alert-block alert-warning">
    Purpose: Compute the Mean Squared Error (MSE) between the heights in the Halle urban atlas 2012 data layer and heights estimated by random forest using geoclimate.
</div>

In [None]:
import enum
import numpy as np
from osgeo import gdal
from osgeo.gdalconst import GA_ReadOnly
import rasterio

In [None]:
def openRaster(fn, access=0):
    ds = gdal.Open(fn, access)
    if ds is None:
        print("Error opening raster dataset")
    return ds
    
def getRasterBand(fn, band=1):
    ds = openRaster(fn)
    band = ds.GetRasterBand(1).ReadAsArray()
    return band
    
def createRasterFromCopy(fn, ds, data, driverFmt="GTiff"):
    driver = gdal.GetDriverByName(driverFmt)
    outds = driver.CreateCopy(fn, ds, strict=0)
    outds.GetRasterBand(1).WriteArray(data)
    ds=None
    outds=None

def plot(ds, title):
    fig, ax = plt.subplots(figsize=(12, 10))
    ds.plot(ax=ax)
    ax.set_title(title)
    ax.set_xlabel('Longitude [deg]')
    ax.set_ylabel('Latitude [deg]')
    
class GdalDTypes(enum.Enum):
    """ Class to map GDAL data types to numpy"""
    uint8 = 1
    int8 = 1
    uint16 = 2
    int16 = 3
    uint32 = 4
    int32 = 5
    float32 = 6
    float64 = 7
    complex64 =  10
    complex128 = 11
    
# TODO: Make a dataclass    
def print_raster_info(ds, name=None):
    if name: 
        print(name)
    print(f'shape: {raster_shape(ds)}')
    print(f'dtype: {raster_dtype(ds)}\n')    
    
def raster_shape(ds):
    return (ds.RasterCount, ds.RasterXSize, ds.RasterYSize)

def raster_dtype(ds):
    return GdalDTypes(ds.GetRasterBand(1).DataType).name

In [None]:
atlasimgpath = 'clipped_halle_urban_atlas_2012.tiff'
geoimgpath = 'geoclimate_heights.tiff'

In [None]:
atlasds = gdal.Open(atlasimgpath)
geoclimateds = gdal.Open(geoimgpath)

## Basic layer info

In [None]:
print_raster_info(atlasds, 'Atlas')

In [None]:
print_raster_info(geoclimateds, 'Geoclimate')

<font color=red> **REMARK:** The dtypes and shapes of the two raster layers are different</font>

## Data type conversion

In [None]:
dstfile = "uint16_geoclimate_heights.tiff"

In [None]:
type(geoclimateds)

In [None]:
srcdata = geoclimateds.ReadAsArray()

In [None]:
srcdata.dtype

In [None]:
dstdata = srcdata.astype('uint16')

In [None]:
dstdata

In [None]:
dstdata.min(), dstdata.max()

In [None]:
# Get geoclimate parameters to be transfered to the new data type
geotransform = geoclimateds.GetGeoTransform()
spatialreference = geoclimateds.GetProjection()
nband, ncol, nrow = raster_shape(geoclimateds)

In [None]:
nband, ncol, nrow

In [None]:
# Parameters for output dataset
driver_name = 'GTiff'
driver = gdal.GetDriverByName(driver_name)
dstds = driver.Create(dstfile, ncol, nrow, nband, gdal.GDT_UInt16)
dstds.SetGeoTransform(geotransform)
dstds.SetProjection(spatialreference)
dstds.GetRasterBand(1).WriteArray(dstdata)
dstds = None

In [None]:
uint16geods = gdal.Open(dstfile)

In [None]:
print_raster_info(uint16geods)

## Clipping of geoclimate tiff

In [None]:
# Use the smaller image as reference, i.e., atlas
geoTransform = atlasds.GetGeoTransform()
minx = geoTransform[0]
maxy = geoTransform[3]
maxx = minx + geoTransform[1] * atlasds.RasterXSize
miny = maxy + geoTransform[5] * atlasds.RasterYSize
print([minx, miny, maxx, maxy])
# atlasds = None

In [None]:
[minx, miny, maxx, maxy]

In [None]:
# Horizontal and vertical resolutions
xRes = geoTransform[1]
yRes = -geoTransform[5]

In [None]:
xRes, yRes

In [None]:
gdal.Translate?

In [None]:
gdal.TranslateOptions?

In [None]:
print_raster_info(atlasds, 'Atlas')

In [None]:
print_raster_info(geoclimateds, 'Geoclimate')

**<font color=red>NOTE: </font>** <font color=red>The right order of coordinates to assign to the input image is: **ulx uly lrx lry**, i.e. upper-left x and y, lower-right x and y, as reported in gdal_translate documentation</font>

In [None]:
clipped_halle_geoclimate = gdal.Translate(srcDS=geoclimateds, destName="clipped_geoclimate.tiff", projWin=[minx, maxy, maxx, miny], outputType=gdal.GDT_UInt16, format="GTiff", noData=65535)

In [None]:
print_raster_info(clipped_halle_geoclimate, 'Clipped geoclimate')

## Creation of binary mask

In [None]:
# Get the array data of each dataset
atlasdata = atlasds.ReadAsArray()
geoclimatedata = clipped_halle_geoclimate.ReadAsArray()

In [None]:
atlasdata

In [None]:
atlasdata.shape

In [None]:
geoclimatedata

In [None]:
geoclimatedata.shape

In [None]:
# Create masks for valid values
atlasmasked = np.ma.masked_where(65535, atlasdata)
geomasked = np.ma.masked_where(65535, geoclimatedata)

In [None]:
atlasmasked

In [None]:
geomasked

In [None]:
# Find the overlap of the two masked arrays
overlapmask = np.bitwise_and(atlasmasked.mask, geomasked.mask)

In [None]:
overlapmask

## Compute MSE

In [None]:
# Use the overlap to get data from each data layer
geooverdata = atlasmasked.data[overlapmask].reshape(overlapmask.shape)
atlasoverdata = geomasked.data[overlapmask].reshape(overlapmask.shape)

In [None]:
delta = geooverdata - atlasoverdata
delta2 = delta*delta

In [None]:
mse = delta2.mean()

In [None]:
mse