In [2]:
import pandas as pd
from osgeo import gdal
import netCDF4
import numpy as np
import os
# import geowombat as gw
# from geowombat.data import rgbn

In [38]:
filepath = "../data/unit_wf_selected_crops_average_2010_2019/wf_unit_potatoes_average_2010_2019.nc"
ds = netCDF4.Dataset(filepath)
ds

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: Unit water footprints of potatoes (FAO code: 116) simulated with crop model ACEA
    institution: University of Twente, Netherlands
    contact: Mialyk Oleksandr (o.mialyk@utwente.nl)
    dimensions(sizes): lat(2160), lon(4320)
    variables(dimensions): float64 lon(lon), float64 lat(lat), float32 wf_unit_rainfed_green(lat, lon), float32 wf_unit_rainfed_blue(lat, lon), float32 wf_unit_irrigated_green(lat, lon), float32 wf_unit_irrigated_blue(lat, lon), float32 wf_unit_rainfed(lat, lon), float32 wf_unit_irrigated(lat, lon), float32 wf_unit_total(lat, lon)
    groups: 

In [39]:
list(ds.variables)

['lon',
 'lat',
 'wf_unit_rainfed_green',
 'wf_unit_rainfed_blue',
 'wf_unit_irrigated_green',
 'wf_unit_irrigated_blue',
 'wf_unit_rainfed',
 'wf_unit_irrigated',
 'wf_unit_total']

In [40]:
ds.variables['wf_unit_irrigated_blue'][:].data[0]

array([1.e+20, 1.e+20, 1.e+20, ..., 1.e+20, 1.e+20, 1.e+20], dtype=float32)

In [53]:
input_lat = 32.123 
input_lon = 61.783

In [54]:
lat_var = ds.variables['lat'][:]
lon_var = ds.variables['lon'][:]

In [55]:
lat_idx = np.argmin(np.abs(lat_var - input_lat))
lon_idx = np.argmin(np.abs(lon_var - input_lon))

In [56]:
raster_data = ds.variables['wf_unit_irrigated_blue'][:].data
local_value = raster_data[lat_idx, lon_idx]
local_value

623.37805

In [58]:
ds.variables['wf_unit_irrigated_blue'][:].data[lat_idx, lon_idx]

623.37805

In [59]:
indicator_list = ['wf_unit_rainfed_green',
                     'wf_unit_rainfed_blue',
                     'wf_unit_irrigated_green',
                     'wf_unit_irrigated_blue',
                     'wf_unit_rainfed',
                     'wf_unit_irrigated',
                     'wf_unit_total']

In [99]:
dff_list = []
df_list = []

path_to_folder = '../data/unit_wf_selected_crops_average_2010_2019/'
for filename in os.listdir(path_to_folder):
    # Check if the file is a .nc file
    if filename.endswith('.nc'):
        # Open the file
        ds = netCDF4.Dataset(path_to_folder + filename, 'r')

        # identify the correct grid element
        lat_var = ds.variables['lat'][:]
        lon_var = ds.variables['lon'][:]
        lat_idx = np.argmin(np.abs(lat_var - input_lat))
        lon_idx = np.argmin(np.abs(lon_var - input_lon))

        for ind in indicator_list:
            try:
                 # Get the local value (using the same lat and lon coordinates)
                value = ds.variables[ind][:].data[lat_idx, lon_idx]
            except KeyError:
                value = np.NaN

            # Create a new DataFrame for each iteration
            new_dff = pd.DataFrame({'indicator': [ind], 'value': [value]})

            dff_list.append(new_dff)

        # Concatenate the DataFrames in the list
        dff = pd.concat(dff_list, ignore_index=True)
        
        # Close the file
        ds.close()
       
        # Create a new DataFrame for each iteration
        dff['filename'] = filename    

        # Append the new DataFrame to the list
        df_list.append(dff)
        

# Concatenate the DataFrames in the list
df = pd.concat(df_list, ignore_index=True)

In [100]:
df[['crop_name1', 'crop_name2']] = df['filename'].str.split('unit_', n=1, expand=True)
df[['crop_name', 'crop_name1']] = df['crop_name2'].str.split('_average', n=1, expand=True)
df = df.drop(['crop_name1', 'crop_name2'], axis=1)
df

Unnamed: 0,indicator,value,filename,crop_name
0,wf_unit_rainfed_green,1.000000e+20,wf_unit_almonds_average_2010_2019.nc,almonds
1,wf_unit_rainfed_blue,1.000000e+20,wf_unit_almonds_average_2010_2019.nc,almonds
2,wf_unit_irrigated_green,1.000000e+20,wf_unit_almonds_average_2010_2019.nc,almonds
3,wf_unit_irrigated_blue,1.000000e+20,wf_unit_almonds_average_2010_2019.nc,almonds
4,wf_unit_rainfed,1.000000e+20,wf_unit_almonds_average_2010_2019.nc,almonds
...,...,...,...,...
6617,wf_unit_irrigated_green,1.000000e+20,wf_unit_yams_average_2010_2019.nc,yams
6618,wf_unit_irrigated_blue,1.000000e+20,wf_unit_yams_average_2010_2019.nc,yams
6619,wf_unit_rainfed,1.000000e+20,wf_unit_yams_average_2010_2019.nc,yams
6620,wf_unit_irrigated,1.000000e+20,wf_unit_yams_average_2010_2019.nc,yams


In [101]:
df = df.drop(['filename'], axis=1)

In [102]:
df = df.loc[:, ['crop_name', 'indicator', 'value']]
df

Unnamed: 0,crop_name,indicator,value
0,almonds,wf_unit_rainfed_green,1.000000e+20
1,almonds,wf_unit_rainfed_blue,1.000000e+20
2,almonds,wf_unit_irrigated_green,1.000000e+20
3,almonds,wf_unit_irrigated_blue,1.000000e+20
4,almonds,wf_unit_rainfed,1.000000e+20
...,...,...,...
6617,yams,wf_unit_irrigated_green,1.000000e+20
6618,yams,wf_unit_irrigated_blue,1.000000e+20
6619,yams,wf_unit_rainfed,1.000000e+20
6620,yams,wf_unit_irrigated,1.000000e+20


In [104]:
df.to_csv("../data/waterfootprit_example.csv")