## **Open Rituals**

Import needed package and define paths and useful functions

In [1]:
#%% open rituals ##############################################################
import os 
import tqdm
import numpy as np
import pandas as pd
import geopandas as gpd

## define paths and functions ################################################
### replace this main directory with your own
Path_Main = r'C:\Users\lli55\Desktop\Lingbo Li PhD\DOC project\Model_with_SoilGrid'
Path_Plot = os.path.join(Path_Main, 'plot')
Path_Output = os.path.join(Path_Main, 'output')
Path_Shape = os.path.join(Path_Main, 'shape')
Path_Input = os.path.join(Path_Main, 'input')

## **Back-Calculation of DOC and $P_r$ at Evaluation Catchments**
- a) For each NHDPlus local catchment, the long-time averaged DOC concentration can be calculated using SOC time model predicted $P_r$
- b) Each train or eval catchments are composed for several NHDPlus local catchment. Idealy, the area-weighted average Pr and DOC should be close to the real measurement

### **Step1: calculate the derived doc at each NHDPlus local catchments**

In [None]:
# Load geospatial and predicted Pr over 2.6 million local catchment
data = gpd.read_file(os.path.join(Path_Shape, 'SOC.gpkg'), driver='GPKG')
pr_soilgrid = pd.read_csv(os.path.join(Path_Output, 'pr_at_2_6m.txt'), sep='\t').rename(columns={'pr': 'pr_soilgrid'})
Path_Output_old = r'C:\Users\lli55\Desktop\Lingbo Li PhD\DOC project\Data'
pr_hwsd = pd.read_csv(os.path.join(Path_Output_old, 'pr_at_2_6m_mase.txt'), sep='\t').rename(columns={'pr': 'pr_hwsd'})

# Merge precipitation data with geospatial data
data = data.merge(pr_hwsd, on='COMID', how='left')
data = data.merge(pr_soilgrid, on='COMID', how='left')

# Calculate DOC-related columns
data['doc_hwsd'] = data['soc_hwsd'] * data['pr_hwsd']
data['doc_soilgrid'] = data['soc_soilgrid'] * data['pr_soilgrid']

# update the SOC.gpkg file
data.to_file(os.path.join(Path_Shape, 'SOC.gpkg'))

### **Step2: calculate the area weighted average Pr and DOC**

In [None]:
def doc_reanalysis(raster, catchments):
    catchment = catchments.copy()
    ## Roughly find the local catchments insides the selected catchments using Sjoin
    joined = raster.sjoin(catchment, how="inner", predicate='intersects').reset_index(drop = True)
    doc = []
    for i in tqdm.tqdm(range(len(catchment))):
        comids = catchment['comid'][i]
        geometry = catchment['geometry'][i]
        join = joined[joined['comid'] == comids].reset_index(drop = True)
        area = [geometry.intersection(item).area for item in join['geometry']]
        join['inter_area'] = area
        join = join.dropna(subset = 'doc_soilgrid').reset_index(drop = True)
        if len(join) == 0:
            doc.append(np.nan)
        else:
            join['derived_doc'] = join['inter_area']*join['doc_soilgrid']
            doc.append(np.sum(join['derived_doc'])/np.sum(join['inter_area']))
            
    catchment['derived_doc'] = doc

    return catchment

def pr_reanalysis(raster, catchments):
    catchment = catchments.copy()
    joined = raster.sjoin(catchment, how="inner", predicate='intersects').reset_index(drop = True)
    pr = []
    for i in tqdm.tqdm(range(len(catchment))):
        comids = catchment['comid'][i]
        geometry = catchment['geometry'][i]
        join = joined[joined['comid'] == comids].reset_index(drop = True)
        area = [geometry.intersection(item).area for item in join['geometry']]
        join['inter_area'] = area
        join = join.dropna(subset = 'pr_soilgrid').reset_index(drop = True)
        if len(join) == 0:
            pr.append(np.nan)
        else:
            join['derived_pr'] = join['inter_area']*join['pr_soilgrid']
            pr.append(np.sum(join['derived_pr'])/np.sum(join['inter_area']))
            
    catchment['derived_pr'] = pr
    
    return catchment

selected = gpd.read_file(os.path.join(Path_Shape, 'SOC_compare_eval.gpkg'))
raster = gpd.read_file(os.path.join(Path_Shape, 'SOC.gpkg'))
data_pr = pr_reanalysis(raster, selected)
data_doc = doc_reanalysis(raster, selected)
data_doc['derived_pr'] = data_pr['derived_pr']
data_doc_1 = data_doc[['comid', 'ave_doc', 'soc_soilgrid', 'derived_pr', 'derived_doc']]
data_doc_1.to_csv(os.path.join(Path_Output, 'eval_validation.txt'), sep = '\t', index = None)
data_doc_1 = data_doc[['comid', 'ave_doc', 'soc_soilgrid', 'derived_pr', 'derived_doc']]
data_doc_1.to_csv(os.path.join(Path_Output, 'eval_validation.txt'), sep = '\t', index = None)