This script performs an evaluation of the physical model, comparing the SMR model to actual soil moisture readings in-ground. 

In [None]:
# import packages
import gdal
import os
import pandas as pd
from datetime import datetime

In [None]:
# import coordinates for field sites
coords = pd.read_csv('D:/Dane/UW MSDS/2018-1-Winter Quarter/DATA 591 Data Science Capstone II/HydroSatML/data/sensor_coords/SCF_TierII_site_coords.csv')
coords_AES = coords[0:12]
coords_OD = coords[24:36]

In [None]:
# read in sensor data
sensors = pd.read_csv('D:/Dane/UW MSDS/2018-1-Winter Quarter/DATA 591 Data Science Capstone II/HydroSatML/data/data_for_models/final_join.csv',
                      usecols=['field', 'sensor', 'date', 'depth_1', 'depth_2', 'depth_3', 'depth_4', 'depth_5'])

# remove duplicate rows
sensors.drop_duplicates(inplace=True)

# add column with converted ordinal date
sensors['DOY'] = sensors['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').strftime('%j'))

# add column with year
sensors['year'] = sensors['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d').strftime('%Y'))

# add column with average of all 5 depths
sensors['avg'] = sensors[['depth_1', 'depth_2', 'depth_3', 'depth_4', 'depth_5']].mean(numeric_only=True, axis=1)

# add corresponding SMR filename column for lookup
sensors['filename'] = 'mc_' + sensors['DOY'] + sensors['year'] + '.asc'

# drop all rows with NaN for 'avg' column
sensors.dropna(subset=['avg'], inplace=True)

# return sensor data for only fields 'AES' and 'OD'
sensors = sensors[(sensors['field'] == 'AES') | (sensors['field'] == 'OD')]

In [None]:
# create list to loop through when opening raster files
raster_files = sensors[['field', 'filename']].drop_duplicates()

# loop through raster files, opening and obtaining soil moisture values from SMR
SMR_predictions = []
missing_files = []

for i in range(len(raster_files)):
    
    # get field name, filename, and directory
    field = raster_files.iloc[i][0]
    filename = raster_files.iloc[i][1]
    directory = 'D:/Dane/UW MSDS/2018-1-Winter Quarter/DATA 591 Data Science Capstone II/Local/' + field + '/rz.' + filename
    
    # use appropriate field coordinates
    if field == 'AES':
        points = coords_AES
    elif field == 'OD':
        points = coords_OD
    else:
        pass
    
    # check to see if the corresponding SMR file exists
    if os.path.isfile(directory) == False:
        missing_files.append(filename)
#         SMR_predictions.extend([[field, 0, filename, 'NA']]*12)
    else:
    
        # open raster file
        raster = gdal.Open(directory)

        # georeference info
        transform = raster.GetGeoTransform()
        xOrigin = transform[0]
        yOrigin = transform[3]
        pixelWidth = transform[1]
        pixelHeight = transform[5]

        band = raster.GetRasterBand(1)

        data = band.ReadAsArray()

        # loop through coordinates
        sensor_num = 0
        for point in points[['east', 'north']].values.tolist():

            sensor_num += 1
            
            x = point[0]
            y = point[1]

            xOffset = int((x - xOrigin) / pixelWidth)
            yOffset = int((y - yOrigin) / pixelHeight)

            value = data[yOffset][xOffset]

            SMR_predictions.append([field, sensor_num, filename, value])

In [None]:
missing_files

In [None]:
predictions = pd.DataFrame(SMR_predictions, columns=['field', 'sensor', 'filename', 'SMR_prediction'])
result = pd.merge(sensors, predictions, on=['field', 'sensor', 'filename'])
result['error'] = result['avg'] - result['SMR_prediction']

In [None]:
abs(result['error']).mean()

In [None]:
(result['error']**2).mean()**(1/2)