In [1]:
import pandas as pd
import numpy as np

# read in data
data_path = '/mnt/inca/ai4sh_data.harmo'
df = pd.read_csv(f'{data_path}/raw_data/Portugal/pt_infosolo.csv', low_memory=False)


In [2]:
temp = pd.DataFrame()

# depth, coordinates, time, and site id
temp['lat'] = df['latitude']
temp['lon'] = df['longitude']
temp['time'] = df['year']
temp['hzn_top'] = df['hor_top']
temp['hzn_btm'] = df['hor_bot']
temp['point_id'] = df['pt_infosolo_site_id']

# oc
temp['oc'] = df['oc']*10 # % -> g/kg
temp.loc[df['oc_m'].isin(['Walkley-Black','Permanganate']),'oc'] = temp.loc[df['oc_m'].isin(['Walkley-Black','Permanganate']),'oc']*1.3
temp.loc[df['oc_m']=='Tinsley','oc'] = temp.loc[df['oc_m']=='Tinsley','oc']*1.05
temp.loc[df['oc_m']=='Springer & Klee. Wet combustion','oc'] = temp.loc[df['oc_m']=='Springer & Klee. Wet combustion','oc']*1.01
temp.loc[df['oc_m']=='Colorimetric. Perkin-Elmer','oc'] = temp.loc[df['oc_m']=='Colorimetric. Perkin-Elmer','oc']*1.04
temp.loc[df['oc_m'].isin(['Not determined','Unkown','Titrimetric determination']),'oc'] = np.nan

In [3]:
# ph h2o
temp['ph_h2o'] = df['ph']
temp.loc[df['ph_m']=='Not determined','ph_h2o'] = np.nan

# N
temp['N'] = df['n']
temp.loc[df['n_m']=='Not determined','N'] = np.nan

# caco3
temp['caco3'] = df['caco3']*10 # % -> g/kg
temp.loc[df['caco3_m']=='Not determined','caco3'] = np.nan

# texture
a = 'Pipette method for particles having diameters <50 mm (clay and silt fractions), and by sieving for particles between 50 and 2000 mm (sand). Texture limits converted to the Atterberg scale using smooth splines.'
temp['clay'] = df['c'] # <0.002mm
temp['silt'] = df['si'] 
temp['sand'] = df['fs'] + df['cs'] 
temp.loc[df['texture_m']!=a,'silt'] = np.nan
temp.loc[df['texture_m']!=a,'sand'] = np.nan

# cec
temp['CEC'] = df['cec']
temp.loc[df['cec_m']=='Not determined','cec'] = np.nan

# extractable K
temp['K'] = df['k']
temp.loc[df['k_m']=='Egner-Riehm','K'] = temp.loc[df['k_m']=='Egner-Riehm','K']*0.95 # egner-riehm, AL -> AAE
temp.loc[df['k_m']=='Morgan','K'] = (temp.loc[df['k_m']=='Morgan','K']*1.038+32.7)/1.7 # morgan -> AAE, and kg/ha at 17cm depth -> mg/kg
temp.loc[df['k_m']=='Bray I','K'] = temp.loc[df['k_m']=='Bray I','K']*1.30
temp.loc[df['k_m'].isin(['Not determined','Unkown']),'K'] = np.nan

# extractable P
temp['P'] = np.nan
temp.loc[df['p_m']=='Bray I','P'] = (df.loc[df['p_m']=='Bray I','p']-1.689)/0.514

# bulk density
temp['bulk_density'] = df['bd']
temp.loc[df['bd_m']=='Not determined','bulk_density'] = np.nan


In [8]:
temp['nuts0'] = 'PT'
temp['ref'] = 'portugal.infosolo'

In [9]:
temp.to_csv(f'{data_path}/data/portugal_harmonized_v1.csv',index=False)