In [1]:
import pandas as pd
from osgeo import gdal
import netCDF4
import numpy as np
import os
# import geowombat as gw
# from geowombat.data import rgbn

# load raster information

## test case for one 

In [4]:
filepath = "../data/NC/PEST-CHEMGRIDSv2_Alfalfa_2,4-D.nc"
ds = netCDF4.Dataset(filepath)
ds

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    Creation date: 15-Dec-2022 14:44:04
    Authors: F. Maggi, F.H.M. Tang, T.H. Nguyen
    Version: v2
    dimensions(sizes): lon(7200), lat(3600), year(2)
    variables(dimensions): float32 lon(lon), float32 lat(lat), float32 year(year), float32 apr_H(year, lat, lon), float32 apr_L(year, lat, lon)
    groups: 

In [5]:
list(ds.variables)

['lon', 'lat', 'year', 'apr_H', 'apr_L']

In [7]:
ds.variables['lon'][:]

masked_array(data=[-179.975, -179.925, -179.875, ...,  179.875,  179.925,
                    179.975],
             mask=False,
       fill_value=1e+20,
            dtype=float32)

In [7]:
ds.variables['year'][:]

masked_array(data=[2015., 2018.],
             mask=False,
       fill_value=1e+20,
            dtype=float32)

In [8]:
input_lat = 46.81228 
input_lon = -71.21454

In [35]:
raster_data = ds.variables['apr_H'][:]

In [12]:
lat_var = ds.variables['lat'][:]
lon_var = ds.variables['lon'][:]

In [16]:
lat_idx = np.argmin(np.abs(lat_var - input_lat))
lon_idx = np.argmin(np.abs(lon_var - input_lon))

In [39]:
layer0_data = raster_data.data[0]

In [40]:
local_value = layer0_data[lat_idx, lon_idx]
local_value

0.003327197

## scale up for all pestchemgrids

In [19]:
df_list = []

In [20]:
input_lat = 46.81228 
input_lon = -71.21454

In [21]:
path_to_folder = '../data/NC/'
for filename in os.listdir(path_to_folder):
    # Check if the file is a .nc file
    if filename.endswith('.nc'):
        # Open the file
        ds = netCDF4.Dataset(path_to_folder + filename, 'r')

        # identify the correct grid element
        lat_var = ds.variables['lat'][:]
        lon_var = ds.variables['lon'][:]
        lat_idx = np.argmin(np.abs(lat_var - input_lat))
        lon_idx = np.argmin(np.abs(lon_var - input_lon))

        try:
            # Get the raster data (assuming it's in the same variable)
            high_est = ds.variables['apr_H'][:].data[1]

            # Get the local value (using the same lat and lon coordinates)
            local_high_value = high_est[lat_idx, lon_idx]
        except KeyError:
            local_high_value = np.NaN

        try:
            # Get the raster data (assuming it's in the same variable)
            low_est = ds.variables['apr_L'][:].data[1]

            # Get the local value (using the same lat and lon coordinates)
            local_low_value = low_est[lat_idx, lon_idx]
        except KeyError:
            local_low_value = np.NaN
            
        
        # Create a new DataFrame for each iteration
        new_df = pd.DataFrame({'filename': [filename], 'local_high_value': [local_high_value], 'local_low_value': [local_low_value]})

        # Append the new DataFrame to the list
        df_list.append(new_df)
        
        # Close the file
        ds.close()

# Concatenate the DataFrames in the list
df = pd.concat(df_list, ignore_index=True)

  df = pd.concat(df_list, ignore_index=True)


In [36]:
df

Unnamed: 0,filename,local_high_value,local_low_value,crop_name,pesticide_name
0,"PEST-CHEMGRIDSv2_Alfalfa_2,4-D.nc",0.042004,0.013462,,"Alfalfa_2,4-D.nc"
1,"PEST-CHEMGRIDSv2_Alfalfa_2,4-DB.nc",0.021011,0.020942,,"Alfalfa_2,4-DB.nc"
2,PEST-CHEMGRIDSv2_Alfalfa_CARBARYL.nc,0.005422,0.002165,,Alfalfa_CARBARYL.nc
3,PEST-CHEMGRIDSv2_Alfalfa_CHLORPYRIFOS.nc,0.190494,0.186592,,Alfalfa_CHLORPYRIFOS.nc
4,PEST-CHEMGRIDSv2_Alfalfa_CLETHODIM.nc,0.006767,0.001713,,Alfalfa_CLETHODIM.nc
...,...,...,...,...,...
265,PEST-CHEMGRIDSv2_Wheat_PYROXASULFONE.nc,0.066561,0.027406,,Wheat_PYROXASULFONE.nc
266,PEST-CHEMGRIDSv2_Wheat_SULFUR.nc,0.006522,0.006300,,Wheat_SULFUR.nc
267,PEST-CHEMGRIDSv2_Wheat_TEBUCONAZOLE.nc,0.078309,0.078033,,Wheat_TEBUCONAZOLE.nc
268,PEST-CHEMGRIDSv2_Wheat_THIFENSULFURON.nc,0.017334,0.016608,,Wheat_THIFENSULFURON.nc


In [39]:
df[['source', 'crop_name', 'pesticide_name']] = df['filename'].str.split('_', n=2, expand=True)
# df['pesticide_name'] = df['pesticide_name'].str.split('.nc', n=1, expand=True)[0]

In [41]:
df['pesticide_name'] = df['pesticide_name'].str.rstrip('.nc')

In [43]:
df = df.drop(['filename', 'source'], axis=1)

In [45]:
df = df.loc[:, ['crop_name', 'pesticide_name', 'local_low_value', 'local_high_value']]
df

Unnamed: 0,crop_name,pesticide_name,local_low_value,local_high_value
0,Alfalfa,"2,4-D",0.013462,0.042004
1,Alfalfa,"2,4-DB",0.020942,0.021011
2,Alfalfa,CARBARYL,0.002165,0.005422
3,Alfalfa,CHLORPYRIFOS,0.186592,0.190494
4,Alfalfa,CLETHODIM,0.001713,0.006767
...,...,...,...,...
265,Wheat,PYROXASULFONE,0.027406,0.066561
266,Wheat,SULFUR,0.006300,0.006522
267,Wheat,TEBUCONAZOLE,0.078033,0.078309
268,Wheat,THIFENSULFURON,0.016608,0.017334


In [47]:
df.to_csv("../data/pesticide_application_quebec_city.csv")