# COMPUTATION OF MSE

<div class="alert alert-block alert-warning">
    Purpose: Compute the Mean Squared Error (MSE) between the true heights and heights estimated by the different methods.
</div>

In [1]:
import enum
import numpy as np
import os
from osgeo import (gdal, gdal_array)
from osgeo.gdalconst import GA_ReadOnly
import rasterio

In [2]:
def openRaster(fn, access=0):
    ds = gdal.Open(fn, access)
    if ds is None:
        print("Error opening raster dataset")
    return ds
    
def getRasterBand(fn, band=1):
    ds = openRaster(fn)
    band = ds.GetRasterBand(1).ReadAsArray()
    return band
    
def createRasterFromCopy(fn, ds, data, driverFmt="GTiff"):
    driver = gdal.GetDriverByName(driverFmt)
    outds = driver.CreateCopy(fn, ds, strict=0)
    outds.GetRasterBand(1).WriteArray(data)
    ds=None
    outds=None

def plot(ds, title):
    fig, ax = plt.subplots(figsize=(12, 10))
    ds.plot(ax=ax)
    ax.set_title(title)
    ax.set_xlabel('Longitude [deg]')
    ax.set_ylabel('Latitude [deg]')
    
class GdalDTypes(enum.Enum):
    """ Class to map GDAL data types to numpy"""
    uint8 = 1
    int8 = 1
    uint16 = 2
    int16 = 3
    uint32 = 4
    int32 = 5
    float32 = 6
    float64 = 7
    complex64 =  10
    complex128 = 11
    
# TODO: Make a dataclass    
def print_raster_info(ds, name=None):
    if name: 
        print(name)
    print(f'shape: {raster_shape(ds)}')
    print(f'dtype: {raster_dtype(ds)}\n')    
    
def raster_shape(ds):
    return (ds.RasterCount, ds.RasterXSize, ds.RasterYSize)

def raster_dtype(ds):
    return GdalDTypes(ds.GetRasterBand(1).DataType).name

## Paths

In [3]:
srcdir = 'GeoTiff Heights'
truth = os.path.join(srcdir, 'Hall_Truth.tiff')
demdtm = os.path.join(srcdir, 'Halle_dem_dtm_heights.tiff')
geoclm = os.path.join(srcdir, 'Halle_Geoclimate.tiff')
meth1 = os.path.join(srcdir, 'halle_method1_raster_Final_Test.tiff')
ua12 = os.path.join(srcdir, 'Halle_UA12.tif')

In [4]:
atlasimgpath = 'clipped_halle_urban_atlas_2012.tiff'
geoimgpath = 'geoclimate_heights.tiff'

## Datasets

In [5]:
truthds = gdal.Open(truth)
demdtmds = gdal.Open(demdtm)
geoclmds = gdal.Open(geoclm)
meth1ds = gdal.Open(meth1)
ua12ds = gdal.Open(ua12)

## Basic layer info

In [6]:
print_raster_info(truthds, 'Truth')
print_raster_info(demdtmds, 'Dem - Dtm')
print_raster_info(geoclmds, 'Geoclimate')
print_raster_info(meth1ds, 'Method1')
print_raster_info(ua12ds, 'UA12')

Truth
shape: (1, 1625, 1601)
dtype: float64

Dem - Dtm
shape: (1, 1202, 1011)
dtype: float64

Geoclimate
shape: (1, 1526, 1367)
dtype: float64

Method1
shape: (1, 1218, 1126)
dtype: float64

UA12
shape: (1, 1575, 1466)
dtype: uint16



<font color=red> **REMARK:** The dtypes and shapes of the two raster layers are different</font>. 

## Data type conversion

In [7]:
def to_uint16_dataset(srcds):
    uint16array = srcds.ReadAsArray().astype('uint16')
    geotransform = srcds.GetGeoTransform()
    spatialreference = srcds.GetProjection()
    nband, ncol, nrow = raster_shape(srcds)
    ds = gdal_array.OpenArray(uint16array)
    ds.SetGeoTransform(geotransform)
    ds.SetProjection(spatialreference)
    ds.GetRasterBand(1).WriteArray(uint16array)
    return ds

In [8]:
u16truthds = to_uint16_dataset(truthds)
u16demdtmds = to_uint16_dataset(demdtmds)
u16geoclmds = to_uint16_dataset(geoclmds)
u16meth1ds = to_uint16_dataset(meth1ds)

In [9]:
print_raster_info(u16truthds, 'uint16 Truth')
print_raster_info(u16demdtmds, 'uint16 Dem - Dtm')
print_raster_info(u16geoclmds, 'uint16 Geoclimate')
print_raster_info(u16meth1ds, 'uint16 Method 1')
print_raster_info(ua12ds, 'uint16 UA12')

uint16 Truth
shape: (1, 1625, 1601)
dtype: uint16

uint16 Dem - Dtm
shape: (1, 1202, 1011)
dtype: uint16

uint16 Geoclimate
shape: (1, 1526, 1367)
dtype: uint16

uint16 Method 1
shape: (1, 1218, 1126)
dtype: uint16

uint16 UA12
shape: (1, 1575, 1466)
dtype: uint16



## Clipping to a common size

In [10]:
# Use the smaller image as reference
gt = u16demdtmds.GetGeoTransform()
minx = gt[0]
maxy = gt[3]
maxx = minx + gt[1] * u16demdtmds.RasterXSize
miny = maxy + gt[5] * u16demdtmds.RasterYSize
print([minx, miny, maxx, maxy])
# atlasds = None

[4451610.0, 3148570.0, 4463630.0, 3158680.0]


In [11]:
[minx, miny, maxx, maxy]

[4451610.0, 3148570.0, 4463630.0, 3158680.0]

In [12]:
# Horizontal and vertical resolutions
xRes = gt[1]
yRes = -gt[5]

In [13]:
xRes, yRes

(10.0, 10.0)

**<font color=red>NOTE: </font>** <font color=red>The right order of coordinates to assign to the input image is: **ulx uly lrx lry**, i.e. upper-left x and y, lower-right x and y, as reported in gdal_translate documentation</font>

In [14]:
cu16truthds = gdal.Translate(srcDS=u16truthds, destName="cu16truthds.tiff", projWin=[minx, maxy, maxx, miny], outputType=gdal.GDT_UInt16, format="GTiff", noData=65535)
cua12ds = gdal.Translate(srcDS=ua12ds, destName="cu16demdtmds.tiff", projWin=[minx, maxy, maxx, miny], outputType=gdal.GDT_UInt16, format="GTiff", noData=65535)
cu16geoclmds = gdal.Translate(srcDS=u16geoclmds, destName="cu16geoclmds.tiff", projWin=[minx, maxy, maxx, miny], outputType=gdal.GDT_UInt16, format="GTiff", noData=65535)
cu16meth1ds = gdal.Translate(srcDS=u16meth1ds, destName="cu16meth1ds.tiff", projWin=[minx, maxy, maxx, miny], outputType=gdal.GDT_UInt16, format="GTiff", noData=65535)

In [15]:
print_raster_info(cu16truthds, 'uint16 Truth')
# Note that the geoclimate layer was not clipped as it was the smaller one
print_raster_info(u16demdtmds, 'uint16 Dem - Dtm')
print_raster_info(cu16geoclmds, 'uint16 Geoclimate')
print_raster_info(cu16meth1ds, 'uint16 Method 1')
print_raster_info(cua12ds, 'uint16 UA12')

uint16 Truth
shape: (1, 1202, 1011)
dtype: uint16

uint16 Dem - Dtm
shape: (1, 1202, 1011)
dtype: uint16

uint16 Geoclimate
shape: (1, 1202, 1011)
dtype: uint16

uint16 Method 1
shape: (1, 1202, 1011)
dtype: uint16

uint16 UA12
shape: (1, 1202, 1011)
dtype: uint16



<font color=blue> Now all the layers have the sime data type and shape. Moreover, all the layers have the same extend. </font>

## Creation of masked arrays

In [16]:
def get_masked_array(ds, nan=65535):
    dsarray = ds.ReadAsArray()
    return np.ma.masked_where(nan, dsarray)

In [17]:
maskedtruth = get_masked_array(cu16truthds)
maskeddemdtm = get_masked_array(u16demdtmds)
# Note that the geoclimate layer was not clipped as it was the smaller one
maskedgeoclm = get_masked_array(cu16geoclmds)
maskedmeth1 = get_masked_array(cu16meth1ds)
maskedua12 = get_masked_array(cua12ds)

## Getting the overlaps of the datasets with the ground truth

In [18]:
truthdemdtmoverlapmask = np.bitwise_and(maskedtruth.mask, maskeddemdtm.mask)
truthgeoclmoverlapmask = np.bitwise_and(maskedtruth.mask, maskedgeoclm.mask)
truthmeth1overlapmask  = np.bitwise_and(maskedtruth.mask, maskedmeth1.mask)
truthua12overlapmask  = np.bitwise_and(maskedtruth.mask, maskedua12.mask)

## Compute MSE

In [19]:
def get_error(ma1, ma2, commonmask):
    overlapdata1 = ma1.data[commonmask].reshape(commonmask.shape)
    overlapdata2 = ma2.data[commonmask].reshape(commonmask.shape)
    return overlapdata1 - overlapdata2

In [20]:
# Use the overlap to extract data from each of the layers to be compared
truthdemdtmdelta = get_error(maskedtruth, maskeddemdtm, truthdemdtmoverlapmask)
truthgeoclmdelta = get_error(maskedtruth, maskedgeoclm, truthgeoclmoverlapmask)
truthmeth1delta  = get_error(maskedtruth, maskedmeth1, truthmeth1overlapmask)
truthua12delta   = get_error(maskedtruth, maskedua12, truthua12overlapmask)

In [21]:
# Use the overlap to extract data from each of the layers to be compared
truthdemdtmdelta2 = truthdemdtmdelta*truthdemdtmdelta
truthgeoclmdelta2 = truthgeoclmdelta*truthgeoclmdelta
truthmeth1delta2  = truthmeth1delta*truthmeth1delta
truthua12delta2   = truthua12delta*truthua12delta

In [22]:
msetruthdemdtm = truthdemdtmdelta2.mean()
msetruthgeoclm = truthgeoclmdelta2.mean()
msetruthmeth1  = truthmeth1delta2.mean()
msetruthua12   = truthua12delta2.mean()

In [23]:
print(f'MSE truth-demdtm:     {msetruthdemdtm}')
print(f'MSE truth-geoclimate: {msetruthgeoclm}')
print(f'MSE truth-method1:    {msetruthmeth1}')
print(f'MSE truth-UA12:         {msetruthua12}')

MSE truth-demdtm:     6.79817021087505
MSE truth-geoclimate: 10.142367402828455
MSE truth-method1:    5.785530545036216
MSE truth-UA12:         11.411197295638162
