# Compare Raster Files (numerically and visually) to reveal their differences

In [None]:
import os, sys

import gdal
from gdalconst import *
import numpy as np

def read_raster_bands(geofile):
# replaced def readtiff(geofile):
    """ read raster bands into numpy array
    input param rasterfile: geofile
    return: a list of[ numpy ]
    """


    # Register drivers
    gdal.AllRegister()

    # Open image
    ds = gdal.Open(geofile, GA_ReadOnly)

    if ds is None:
        raise Exception('Could not open image file %s' %(geofile))
        

    # get image size
    rows = ds.RasterYSize
    cols = ds.RasterXSize
    numbands = ds.RasterCount

#     print 'rows= %s, cols= %s, number of bands = %s' %(str(rows), str(cols), str(numbands))
#     print ("********************")

    # get projection and resolution info of the raster
    proj = ds.GetProjection()

    transform = ds.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = transform[5]

#     print ("Projection Info = %s"%(proj))
#     print ("xOrigin = %s,  yOrigin = %s "%(xOrigin, yOrigin))
#     print ("pixelWidth = %s,  pixelHeight = %s "%(pixelWidth, pixelHeight))
    
    # Read the data into numpy array
    numarray=[]
    for i in range(1,numbands+1):
        band =ds.GetRasterBand(i)  # the very first band is i=1
        data = band.ReadAsArray(0,0,cols,rows) #.astype('float32')
        numarray.append(data)
    
    # Once we're done, close properly the dataset
    ds = None
    
    return numarray

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy.stats as spys

def showimages(rastfile_a,rastfile_b=None):
    """ Display side-by-side two geo-raster files.
    
    """
    numalist_a = read_raster_bands(rastfile_a)
    band0a=numalist_a[0]
    
    if rastfile_b is not None:
        numalist_b = read_raster_bands(rastfile_b)
        band0b=numalist_b[0]
        
    plt.imshow(band0a)

#if __name__ == "__main__":
#    infile = sys.argv[1]
#    nparlist = readtiff(infile)



In [None]:
# Display an image

PyramidClearWaterObsTif='/g/data/u46/wofs/water_FZ20160203/pyramids/ClearObservations/7/clear_obs_143_-033_1_1.tif'

showimages(PyramidClearWaterObsTif)

In [None]:
# Complete Rerun Wofs

PyramidClearWaterObs6Tif='/g/data/u46/wofs/water_FZ20160203/pyramids/ClearObservations/6/clear_obs_143_-033_1_1.tif'
PyramidClearWaterObs7Tif='/g/data/u46/wofs/water_FZ20160203/pyramids/ClearObservations/7/clear_obs_143_-033_1_1.tif'

    
imga=read_raster_bands(PyramidClearWaterObs6Tif)
imgb=read_raster_bands(PyramidClearWaterObs7Tif)

f, ax = plt.subplots(1,2,figsize=(20,10))
ax[0].imshow(imga[0], cmap='Greys')
ax[0].set_title('%s\n'%('Clear Observations Pyramid 6'))
#plt.show()    

ax[1].imshow(imgb[0], cmap='Greys')
ax[1].set_title('%s\n'%('Clear Observations  Pyramid 7')) 

plt.show()

In [None]:
# Double counting-problem Clear Obs

PyramidClearWaterObs6Tif='/g/data/u46/wofs/WOFS_OLD_RUNS/water_zf2/pyramids/ClearObservations/6/clear_obs_143_-033_1_1.tif'
PyramidClearWaterObs7Tif='/g/data/u46/wofs/WOFS_OLD_RUNS/water_zf2/pyramids/ClearObservations/7/clear_obs_143_-033_1_1.tif'

    
imga=read_raster_bands(PyramidClearWaterObs6Tif)
imgb=read_raster_bands(PyramidClearWaterObs7Tif)

f, ax = plt.subplots(1,2,figsize=(20,10))
ax[0].imshow(imga[0], cmap='Greys')
ax[0].set_title('%s\n'%('Double Cunting Problem Clear Observations Pyramid 6'))
#plt.show()    

ax[1].imshow(imgb[0], cmap='Greys')
ax[1].set_title('%s\n'%('Clear Observations  Pyramid 7')) 

plt.show()

In [None]:

#Published 
tifa='/g/data/fk4/wofs/water_f7q/pyramids/ClearObservations/6/obscount_111_-028_1_1.tif'
#Rerun
tifb='/g/data/u46/wofs/water_FZ20160203/pyramids/ClearObservations/6/clear_obs_143_-033_1_1.tif'

    
imga=read_raster_bands(tifa)
imgb=read_raster_bands(tifb)

f, ax = plt.subplots(1,2,figsize=(20,10))
ax[0].imshow(imga[0]) #, cmap='Greys')
ax[0].set_title('%s\n'%('Previous Published good Clear Observations Pyramid 6'))
#plt.show()    

ax[1].imshow(imgb[0]) #, cmap='Greys')
ax[1].set_title('%s\n'%('Rerun Clear Observations  Pyramid 6')) 

plt.show()

In [None]:
def compare_two_files_pixels(rastfile_a,rastfile_b):
    """ Compare pixel values data of two geo-raster files.
    The result could be raster pixel values equal, but metadata differ
    return True if idenitcal, False if not
    """
    numalist_a = read_raster_bands(rastfile_a)
    numalist_b = read_raster_bands(rastfile_b)

    print "The number of bands in each raster files:"
    print len(numalist_a) , len(numalist_b)

    if len(numalist_a) != len(numalist_b):
        return False
    
    for n in range(0,len(numalist_a) ):
        print "The numerical sums of the band ", n, "in each raster files: "
        print np.sum(numalist_a[n]), np.sum(numalist_b[n])
    
        is_all_eq = (numalist_a[n]==numalist_b[n]).all()
        
        if is_all_eq is False:
            print "Not all are equal in the band ", n
            
            return is_all_eq
        
    print "Comparison of the two files pixel values result in:"
    
    return True

In [None]:
WATER_EXTENTS_DIR='/g/data/u46/wofs/extents'
WATER_EXTENTS_DIR_OLD='/g/data/u46/wofs/extents_OLD_540GB_02Feb2016'  # Backup
cell_index='144_-026'
a_water_extent_file='LS8_OLI_TIRS_WATER_144_-026_2015-06-15T00-17-29.tif'

rastfile_a=os.path.join(WATER_EXTENTS_DIR,cell_index, a_water_extent_file)
rastfile_b=os.path.join(WATER_EXTENTS_DIR_OLD,cell_index, a_water_extent_file)

numalist_a = read_raster_bands(rastfile_a)
numalist_b = read_raster_bands(rastfile_b)

bo=compare_two_files_pixels(rastfile_a, rastfile_b)

print bo

In [None]:
def compare_two_dirs_rasterfiles(dir_a, dir_b):
    """Compare two directory's tiff files, 
    if they have the same number of tif files, same named, same pixel values for every tif pair"
    """
    
    # loop over dir_a's files to compare each with dir_b's same named file
    
    import glob

    #filelist= glob.glob(self.extentdir + "/LS8*.tif") #os.listdir(self.extentdir)
    filelist= glob.glob(dir_a+ "/*.tif") #os.listdir(self.extentdir)

    for afile in filelist:
        
        fname=os.path.basename(afile)
        bfile =os.path.join(dir_b, fname)
        print ("comparing file %s and %s" %(afile, bfile))
        
        res = compare_two_files_pixels(afile,bfile)
        print res
        


In [None]:
compare_two_dirs_rasterfiles(WATER_EXTENTS_DIR+'/'+cell_index, WATER_EXTENTS_DIR_OLD+'/'+cell_index)


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import scipy.stats as spys
