# Purple Air Temperature Interpolation

In [7]:
#join the realtime csv to the station XY layer
arcpy.management.AddJoin(
    in_layer_or_view="purpleair_stations_XYTableToPoint",
    in_field="sensor_index",
    join_table="purpleair_stations_realtime.csv",
    join_field="sensor_index",
    join_type="KEEP_ALL",
    index_join_fields="NO_INDEX_JOIN_FIELDS"
)

# Empirical Bayesian Kriging Interpolation

In [12]:
#empirical bayesian kriging for temperature
arcpy.ga.EmpiricalBayesianKriging(
    in_features="purpleair_stations_XYTableToPoint",
    z_field="purpleair_stations_XYTableToPoint.sensor_index",
    out_ga_layer="ppair_kriging_temp2_stats",
    out_raster=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\ppair_kriging_temp2",
    cell_size=0.000932919999999967,
    transformation_type="NONE",
    max_local_points=100,
    overlap_factor=1,
    number_semivariograms=100,
    search_neighborhood="NBRTYPE=StandardCircular RADIUS=9.87111293434023E-02 ANGLE=0 NBR_MAX=15 NBR_MIN=10 SECTOR_TYPE=ONE_SECTOR",
    output_type="PREDICTION",
    quantile_value=0.5,
    threshold_type="EXCEED",
    probability_threshold=None,
    semivariogram_model_type="POWER"
)

In [13]:
#cross validation for kriging
arcpy.ga.CrossValidation(
    in_geostat_layer="stats",
    out_point_feature_class=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\stats_cv"
)



In [20]:
import numpy

input = r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\stats_cv"
arr_temp_kriging = arcpy.da.TableToNumPyArray(input, ("OBJECTID", "Error"))

# Sum the errors
print(arr_temp_kriging["Error"].sum())


31.26167166568093


In [21]:
arr_temp_kriging

array([( 1,  17.76831173), ( 2,  -9.7625885 ), ( 3,   5.58366406),
       ( 4,   0.91476484), ( 5, -21.20712534), ( 6,   2.83349385),
       ( 7,   2.56459677), ( 8, -16.92907115), ( 9,  27.40173776),
       (10,  16.81454868), (11,  18.97329965), (12,  11.87557692),
       (13,  -5.91631626), (14, -24.86061317), (15, -22.38034983),
       (16,  -3.2378818 ), (17,  22.20025622), (18, -20.73954015),
       (19, -27.36411485), (20,  11.97296434), (21,  -5.1468516 ),
       (22,   2.47871312), (23,  19.46895688), (24, -37.37942263),
       (25,  14.49654463), (26,  18.25727395), (27,  24.48706625),
       (28,   4.85721135), (29,  26.19095687), (30,  16.80226106),
       (31, -22.3291103 ), (32,   1.52441265), (33,  -5.45785313),
       (34,  -3.88731428), (35, -40.85118349), (36,  -0.76982917),
       (37,  22.33292331), (38,  21.27082331), (39,   8.811376  ),
       (40,  -1.86633883), (41, -32.71984254), (42,  -3.33695902),
       (43,  10.35088427), (44,  -5.8160569 ), (45, -24.053175

In [22]:
kriging_temp_arr = pd.DataFrame(arr_temp_kriging, columns=["OBJECTID", "Error"])
kriging_temp_arr

Unnamed: 0,OBJECTID,Error
0,1,17.768312
1,2,-9.762589
2,3,5.583664
3,4,0.914765
4,5,-21.207125
...,...,...
70,71,-3.075076
71,72,-24.002059
72,73,20.254745
73,74,3.393705


In [26]:
# Calculate squared error
kriging_temp_arr['squared_error'] = kriging_temp_arr['Error'] ** 2

# Calculate RMSE
krig_rmse = numpy.sqrt(kriging_temp_arr['squared_error'].mean())

# Display RMSE
print("RMSE: ", krig_rmse)

RMSE:  16.623762729612874


In [11]:
#raster to point for the kriging 
arcpy.conversion.RasterToPoint(
    in_raster="kriging_temp",
    out_point_features=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\points_temp_kriging1",
    raster_field="Value"
)





# IDW Interpolation

In [4]:
#interpolation using IDW for temperature
arcpy.ga.IDW(
    in_features="purpleair_stations_XYTableToPoint",
    z_field="purpleair_stations_XYTableToPoint.temperature",
    out_ga_layer="idw_temp_stats",
    out_raster=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\idw_temp",
    cell_size=0.000932919999999967,
    power=2,
    search_neighborhood="NBRTYPE=Standard S_MAJOR=9.87111293434023E-02 S_MINOR=9.87111293434023E-02 ANGLE=0 NBR_MAX=15 NBR_MIN=10 SECTOR_TYPE=ONE_SECTOR",
    weight_field=None
)

#cross validation of IDW interpolation of temperature
arcpy.ga.CrossValidation(
    in_geostat_layer="idw_temp_stats",
    out_point_feature_class=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\idw_cv"
)

In [29]:
#dumping it into a numpy array for calculating rmse
input = r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\idw_cv"
arr_temp_idw = arcpy.da.TableToNumPyArray(input, ("OBJECTID", "Error"))

#putting the numpy array into a pd df
idw_temp_arr = pd.DataFrame(arr_temp_idw, columns=["OBJECTID", "Error"])

# Calculate squared error
idw_temp_arr['squared_error'] = idw_temp_arr['Error'] ** 2

# Calculate RMSE
idw_rmse = numpy.sqrt(idw_temp_arr['squared_error'].mean())

# Display RMSE
print("RMSE: ", idw_rmse)

RMSE:  19.199670582282227


In [None]:
#raster to point for idw
arcpy.conversion.RasterToPoint(
    in_raster="idw_temp",
    out_point_features=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\RasterT_idw_tem1",
    raster_field="Value"
)

# Local Polynomial Interpolation

In [None]:
#local polynomial interpolation
arcpy.ga.LocalPolynomialInterpolation(
    in_features="purpleair_stations_XYTableToPoint",
    z_field="purpleair_stations_XYTableToPoint.temperature",
    out_ga_layer="lp_temp_geo",
    out_raster=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\lp_temp",
    cell_size=0.000932919999999967,
    power=1,
    search_neighborhood="NBRTYPE=Standard S_MAJOR=9.87111293434023E-02 S_MINOR=9.87111293434023E-02 ANGLE=0 NBR_MAX=15 NBR_MIN=10 SECTOR_TYPE=ONE_SECTOR",
    kernel_function="EXPONENTIAL",
    bandwidth=None,
    use_condition_number="NO_USE_CONDITION_NUMBER",
    condition_number=None,
    weight_field=None,
    output_type="PREDICTION"
)

In [None]:
#cross validation for lp interpolation
arcpy.ga.CrossValidation(
    in_geostat_layer="lp_temp_geo",
    out_point_feature_class=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\lp_cv"
)


In [27]:
#dumping it into a numpy array for calculating rmse
input = r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\lp_cv"
arr_temp_lp = arcpy.da.TableToNumPyArray(input, ("OBJECTID", "Error"))

#putting the numpy array into a pd df
lp_temp_arr = pd.DataFrame(arr_temp_lp, columns=["OBJECTID", "Error"])

# Calculate squared error
lp_temp_arr['squared_error'] = lp_temp_arr['Error'] ** 2

# Calculate RMSE
lp_rmse = numpy.sqrt(lp_temp_arr['squared_error'].mean())

# Display RMSE
print("RMSE: ", lp_rmse)

RMSE:  18.282743526079205


In [None]:
#raster to point for lp interpolation
arcpy.conversion.RasterToPoint(
    in_raster="lp_temp",
    out_point_features=r"C:\Users\laure\OneDrive\Documents\ArcGIS\Projects\PurpleAirStations\PurpleAirStations.gdb\RasterT_lp_temp1",
    raster_field="Value"
)

# RMSE Comparison

In [31]:
#bringing the RMSE values together in a pd df to compare

temp_compare = pd.DataFrame({'method':['kriging', 'idw', 'local polynomial'],
                            'RMSE':[krig_rmse, idw_rmse, lp_rmse]})
temp_compare



Unnamed: 0,method,RMSE
0,kriging,16.623763
1,idw,19.199671
2,local polynomial,18.282744


In [32]:
#conclusion
# Get index of row with lowest value of rmse
min_index = temp_compare['RMSE'].idxmin()

# Get method with lowest rmse
min_id = temp_compare.loc[min_index, 'method']

print("Method with the lowest RMSE value is", min_id)

Method with the lowest RMSE value is kriging
