# Calculate contributing area, recharge temperature and heat flux of thermal springs

This notebook uses estimates of groundwater recharge and recorde average discharge of springs to calculate the contributing area of each spring, ie the area that contributes groundwater recharge to each spring. It then draws a circle of the size of the contributing area around each spring and records the minimum and maximum surface elevation in this circle using digital elevation data. This can then be used to estimate the minimum and maximum infiltration temperature of the water that feeds the thermal springs. THis is useful for calculating the heat flow of each spring.

**Input:** ``data/data/thermal_springs_alps_with_geospatial_data.csv`` -> spring dataset with the results of the geospatial data analysis obtained with the ``GIS_analysis_spring_data.ipynb`` notebook.

**Output:** input filename with ``_with_HF_estimates.csv`` appended to it -> contains additional columns for the estimated recharge temperature, the contributing area and the spring heat flux 

In [64]:
import math
import itertools
import string
import chardet

import numpy as np
import pandas as pd
import geopandas as gp
import scipy.stats

import shapely.geometry as gm
import matplotlib.pyplot as pl

#import lib.pyGISlib as pyGISlib

import lib.various_functions as vf

## Parameters

In [65]:
# define locations of spring and raster files. change this to the locations on your own machine or server:
spring_data_file = 'data/thermal_springs_alps_with_geospatial_data.csv'

# shapefile with outline of the Alps
fnb = 'GIS_data/outline_alps_final.shp'

# min. difference between recharge and spring temp to be considered a thermal spring
temp_cutoff_thermal_springs = 5.0

# 
g = 9.81

# specific heat capacity J kg-1 K-1
c = 4181.3

# fluid density
rho = 1000.0

#
degree_symbol = chr(176)

year = 365.25 * 24 * 3600.

# average geothermal gradient
avg_thermal_gradient = 26e-3

# background heat flow density
background_HF = 76e-3

In [66]:
# area of the alps outline shapefile used in GIS_analysis_spring_data.ipynb (m2)
#area_alps = 217759312854.528

# read shapefile with bnd Alps:
dgb = gp.read_file(fnb)

# convert to UTM to calculate area. UTM 32N = epsg: 32632
dgbu = dgb.to_crs({'init': 'epsg:32632'})
area_alps = dgbu.geometry.area[0]

print('used area of alps of %0.2e km2' % (area_alps / 1e6))

used area of alps of 2.02e+05 km2


## Load the thermal springs csv file

In [67]:
def find_encoding(fname):
    r_file = open(fname, 'rb').read()
    result = chardet.detect(r_file)
    charenc = result['encoding']
    return charenc


encoding = find_encoding(spring_data_file)
print(encoding)

UTF-8-SIG


In [68]:
df = pd.read_csv(spring_data_file, encoding=encoding)
#print('columns in csv file: ', df.columns.tolist())

## Get rid of springs without coordinates and convert columns to numeric format:

In [69]:
# make all data numeric
for col in df.columns:
    if 'spring' not in col and 'type' not in col:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    else:
        df[col] = df[col]

print(len(df))

394


## Convert to geopandas file:
(code found here https://gis.stackexchange.com/questions/114066/handling-kml-csv-with-geopandas-drivererror-unsupported-driver-ucsv)

In [70]:
df['geometry'] = df.apply(lambda z: gm.Point(z['long'], z['lat']), axis=1)
dg = gp.GeoDataFrame(df)

In [71]:
## Some stats

In [72]:
print('temperature = ', df['temperature'].describe())
print('discharge = ', df['flow_rate'].describe())

ind = df['flow_rate'] == 0
print(df.loc[ind])

temperature =  count    364.000000
mean      22.286951
std       14.662780
min        3.400000
25%       10.450000
50%       18.050000
75%       31.500000
max       70.500000
Name: temperature, dtype: float64
discharge =  count    241.000000
mean       0.011079
std        0.033609
min        0.000001
25%        0.000400
50%        0.001700
75%        0.006450
max        0.305000
Name: flow_rate, dtype: float64
Empty DataFrame
Columns: [spring number, spring location, spring name, long, lat, type, well_depth, flow_rate_mean, flow_rate_min, flow_rate_max, temp_mean, temp_min, temp_max, reference, reference_DOI, reference_link, EC, pH, sample_temperature, TDS, TDS_min, TDS_max, Na, Ca, Mg, K, NH4, Cl, F, SO4, HCO3, CO3, NO3, Si, Li, SiO2, reference_hydrochemistry, doi_hydrochemistry, reference_link_hydrochemistry, delta_18O, delta_2H, 3H, delta_14C, delta_13C, 4He, 3He, reference_isotope_data, DOI_isotope_data, reference_link_isotope_data, temperature, flow_rate, log_flow_rate, log_temper

## Remove non-thermal springs

In [73]:
non_thermal_ind1 = ((dg['temperature'] - temp_cutoff_thermal_springs) < dg['min_surface_temp_ws'])
print('temp < infiltration temp', np.sum(non_thermal_ind1))

non_thermal_ind2 = dg['temperature'].isnull()
print('nan temp values ', np.sum(non_thermal_ind2))

non_thermal_ind = ((dg['temperature'] - temp_cutoff_thermal_springs) < dg['min_surface_temp_ws']) | dg['temperature'].isnull()
print(np.sum(non_thermal_ind))
print(dg.loc[non_thermal_ind, ['temperature', 'min_surface_temp_ws']])
print(len(dg), len(dg.loc[non_thermal_ind==False]))

temp < infiltration temp 53
nan temp values  30
83
     temperature  min_surface_temp_ws
15           NaN            -5.620833
27          4.80             0.562500
29          3.40             1.387500
32          5.90             2.491667
57         14.60            12.133333
..           ...                  ...
371         6.85             3.970833
373         7.00             3.970833
385          NaN             1.416667
386         4.85             1.416667
387         5.70             1.416667

[83 rows x 2 columns]
394 311


In [74]:
dg_nt = dg.loc[non_thermal_ind]
dg_nt.to_csv('data/non_thermal_springs.csv')

dg = dg.loc[non_thermal_ind==False]
print('remaining springs = %i' % (len(dg)))

remaining springs = 311


### Create dataframe to store summary results

In [75]:
#cols = ['n_springs_total', 'n_springs_with_HF_data', 
#        'spring_density', 
#        'total_HF_min', 'total_HF_max', 
#        'total_HF_min_est', 'total_HF_max_est', 
#        'circulation_temp_min', 'circulation_temp_max',
#        'HF_up_min', 'HF_up_max', 'HF_down_min', 'HF_down_max']

cols = []

ind = ['Alps']

dfr = pd.DataFrame(index=ind, columns=cols)
#dfr.head()

## Calculate the contributing area for each spring 

by dividing discharge (m3/sec) by recharge (m/s):

In [76]:
dg['contributing_area'] = dg['flow_rate_mean'] / (dg['recharge'] /year)
print('statistics contributing area:\n', dg['contributing_area'].describe())

statistics contributing area:
 count    1.980000e+02
mean     5.955481e+05
std      1.873172e+06
min      1.468059e+02
25%      2.245289e+04
50%      8.472520e+04
75%      3.740563e+05
max      1.590481e+07
Name: contributing_area, dtype: float64


## Get the min., best and max  recharge temperature

In [77]:
dg['recharge_temp_max'] = dg['surface_temp']
dg['recharge_temp_min'] = dg['min_surface_temp_ws']
#dg['recharge_temp_min'] = dg['min_surface_temp_ws']

dg['recharge_temp_best'] = (dg['surface_temp'] + dg['min_surface_temp_ws']) / 2.0

freezing = dg['recharge_temp_min'] < 0
dg.loc[freezing, 'recharge_temp_min'] = 0.0

freezing = dg['recharge_temp_best'] < 0
dg.loc[freezing, 'recharge_temp_best'] = 0.0

## Get the min., best and max recharge elevation

In [78]:
dg['rch_elev_max'] = dg['max_elevation_ws']
dg['rch_elev_min'] = dg['elevation']
dg['rch_elev_best'] = (dg['max_elevation_ws'] + dg['elevation']) / 2.0

## Calculate circulation temperature based on SiO2

In [79]:
ind_ok = dg['SiO2'] > 5.0
circ_temps = vf.SI_geothermometers(dg.loc[ind_ok, 'SiO2'].values)
dg.loc[ind_ok, 'circulation_temp_best'] = circ_temps[0]
dg.loc[ind_ok, 'circulation_temp_min'] = circ_temps[1]
dg.loc[ind_ok, 'circulation_temp_max'] = circ_temps[2]

print('mean circulation temp = %0.2e (%0.2e - %0.2e) degr. C' 
      % (dg['circulation_temp_best'].mean(), 
         dg['circulation_temp_min'].mean(), 
         dg['circulation_temp_max'].mean()))

mean circulation temp = 6.80e+01 (5.60e+01 - 7.61e+01) degr. C


## Calculate circulation depth

In [80]:
dg['circ_depth_best'] = (dg['circulation_temp_best'] - dg['surface_temp']) / avg_thermal_gradient
dg['circ_depth_min'] = (dg['circulation_temp_min'] - dg['surface_temp']) / avg_thermal_gradient
dg['circ_depth_max'] = (dg['circulation_temp_max'] - dg['surface_temp']) / avg_thermal_gradient

print('mean circulation depth = %0.2e (%0.2e - %0.2e) m' 
      % (dg['circ_depth_best'].mean(), dg['circ_depth_min'].mean(), dg['circ_depth_max'].mean()))

mean circulation depth = 2.28e+03 (1.82e+03 - 2.59e+03) m


## Estimate h at max circulation depth

In [81]:
dg['h_circ_depth'] = (dg['elevation'] + dg['max_gw_lvl_elev_ws']) / 2.0

## Calculate the heat flux of each spring. 

This is the product of the difference between recharge temperature and discharge temperature of the spring, the discharge of the spring and the heat capacity and density of the spring water.

In [82]:
def calc_net_heat_flow(Q, h1, h2, T1, T2, rho=1000.0, g=9.81, c=4181.3):
    
    viscous_dissipation = rho * g * (h1 - h2) * Q
    # kg m-3 m s-2 m = kg m-1 s-2 = W
    
    H_init = rho * c * (T2 - T1) * Q
    # kg m-3 J K-1 kg-1 K m3 s-1 = J s-1 = W
    
    H = H_init - viscous_dissipation
    
    return H, H_init, viscous_dissipation


minmaxs = ['min', 'best', 'max']

for mm, mmi in zip(minmaxs, minmaxs[::-1]):

    Ts = [dg['recharge_temp_%s' % mmi], dg['circulation_temp_%s' % mm], dg['temperature']]
    hs = [dg['rch_elev_%s' % mmi], dg['h_circ_depth'], dg['elevation']]

    H_labels = ['down', 'up']

    for label, T1, T2, h1, h2 in zip(H_labels, Ts[:-1], Ts[1:], hs[:-1], hs[1:]):
        H, H_init, Hv = calc_net_heat_flow(dg['flow_rate'], h1, h2, T1, T2, rho=rho, g=g, c=c)

        dg['H_%s_%s' % (label, mm)] = H
        #dg['Hi_%s_%s' % (label, mm)] = H_init
        dg['Hv_%s_%s' % (label, mm)] = Hv
        #dg['H_div_Hv_%s_%s' % (label, mm)] = H / Hv
        
        # remove <0 heat flux estimates
        if label == 'down':
            ind_lz = dg['H_%s_%s' % (label, mm)] < 0.0
            dg.loc[ind_lz, 'H_%s_%s' % (label, mm)] = 0.0
        elif label == 'up':
            ind_lz = dg['H_%s_%s' % (label, mm)] > 0.0
            dg.loc[ind_lz, 'H_%s_%s' % (label, mm)] = 0.0
            
    H, Ht, Hv = calc_net_heat_flow(dg['flow_rate'], hs[0], hs[-1], Ts[0], Ts[-1], rho=rho, g=g, c=c)

    label = 'net'
    dg['H_%s_%s' % (label, mm)] = H
    #dg['Ht_%s_%s' % (label, mm)] = Ht
    dg['Hv_%s_%s' % (label, mm)] = Hv
    #dg['Hv_div_H_%s_%s' % (label, mm)] = Hv / H
    #dg['Hv_div_Ht_%s_%s' % (label, mm)] = Hv / Ht
    
    # remove <0 heat flux estimates
    ind_lz = dg['H_%s_%s' % (label, mm)] < 0.0
    dg.loc[ind_lz, 'H_%s_%s' % (label, mm)] = 0.0

## Summarize and store results

In [83]:
ix = 'Alps'
H_labels = ['net', 'down', 'up']

dfr.loc[ix, 'total_background_HF_MW'] = background_HF * area_alps / 1e6

for mm in minmaxs:
    for l in H_labels:
        dfr.loc[ix, 'total_H_%s_%s_MW' % (l, mm)] = dg['H_%s_%s' % (l, mm)].sum() / 1e6
        
        dfr.loc[ix, 'total_Hv_%s_%s_MW' % (l, mm)] = dg['Hv_%s_%s' % (l, mm)].sum() / 1e6
        
        dfr.loc[ix, 'total_H_%s_%s_mW_m-2' % (l, mm)] = \
            dfr.loc[ix, 'total_H_%s_%s_MW' % (l, mm)]  * 1e6 / area_alps * 1000.0
        
        dfr.loc[ix, 'total_H_%s_%s_as_perc_of_background_HF' % (l, mm)] = \
            dfr.loc[ix, 'total_H_%s_%s_MW' % (l, mm)] / dfr.loc[ix, 'total_background_HF_MW'] * 100.0
        
        dfr.loc[ix, 'n_springs_with_H_%s_data' % l] = len(dg['H_%s_%s' % (l, mm)].dropna())
        
        dfr.loc[ix, 'spring_density_%s_per_km2' % l] = \
            dfr.loc[ix, 'n_springs_with_H_%s_data' % l] / (area_alps / 1e6)

## Calculate net heat flux for springs with circulation depth data

In [84]:
indd = dg['H_down_best'].notnull()

H_net_springs_with_circ = dg.loc[indd, 'H_net_best'].sum() / 1e6
H_net_springs_with_circ_min = dg.loc[indd, 'H_net_min'].sum() / 1e6
H_net_springs_with_circ_max = dg.loc[indd, 'H_net_max'].sum() / 1e6
print('net heat flow for springs with downward heat flux / circulation temp estimate = %0.0f - %0.0f MW'
      % (H_net_springs_with_circ_min, H_net_springs_with_circ_max))

net heat flow for springs with downward heat flux / circulation temp estimate = 22 - 32 MW


## Report heat flow estimates

In [85]:
for l in H_labels:
    
    print('\ntotal %s heat flux of %i of a total of %i thermal springs in the Alps = %0.0f (%0.0f - %0.0f) MW' 
          % (l, 
             dfr.loc[ix, 'n_springs_with_H_%s_data' % l] , 
             len(dg),
             dfr.loc[ix, 'total_H_%s_best_MW' % l],
             dfr.loc[ix, 'total_H_%s_min_MW' % l], 
             dfr.loc[ix, 'total_H_%s_max_MW' % l]))
    
    print('\twhich equals a heat flow density of %0.3e - %0.3e mW m-2' 
          % (dfr.loc[ix, 'total_H_%s_min_MW' % l] * 1e6 / area_alps * 1e3,
             dfr.loc[ix, 'total_H_%s_max_MW' % l] * 1e6 / area_alps * 1e3))
    
    print('\twhich equals %0.3f - %0.3f perc of the background heat flux' 
          % (dfr.loc[ix, 'total_H_%s_min_as_perc_of_background_HF' % l],
             dfr.loc[ix, 'total_H_%s_max_as_perc_of_background_HF' % l]))

    print('\ntotal %s viscous dissipation for %i of a total of %i thermal springs in the Alps = %0.0f (%0.0f - %0.0f) MW' 
      % (l, 
         dfr.loc[ix, 'n_springs_with_H_%s_data' % l] , 
         len(dg),
         dfr.loc[ix, 'total_Hv_%s_best_MW' % l],
         dfr.loc[ix, 'total_Hv_%s_min_MW' % l], 
         dfr.loc[ix, 'total_Hv_%s_max_MW' % l]))




total net heat flux of 210 of a total of 311 thermal springs in the Alps = 111 (84 - 146) MW
	which equals a heat flow density of 4.175e-01 - 7.203e-01 mW m-2
	which equals 0.549 - 0.948 perc of the background heat flux

total net viscous dissipation for 210 of a total of 311 thermal springs in the Alps = 14 (29 - 0) MW

total down heat flux of 58 of a total of 311 thermal springs in the Alps = 71 (51 - 87) MW
	which equals a heat flow density of 2.517e-01 - 4.318e-01 mW m-2
	which equals 0.331 - 0.568 perc of the background heat flux

total down viscous dissipation for 58 of a total of 311 thermal springs in the Alps = 2 (17 - -12) MW

total up heat flux of 58 of a total of 311 thermal springs in the Alps = -45 (-30 - -56) MW
	which equals a heat flow density of -1.501e-01 - -2.753e-01 mW m-2
	which equals -0.197 - -0.362 perc of the background heat flux

total up viscous dissipation for 58 of a total of 311 thermal springs in the Alps = 12 (12 - 12) MW


## Report spring density

In [96]:
print('spring density = 1 spring per x km = ', area_alps / len(dg) / 1e6)
print('=radius of ', np.sqrt(area_alps / len(dg) / np.pi) / 1e3)

spring density = 1 spring per x km =  650.662033288625
=radius of  14.3913917937153


## Calculate thermal footprint

In [87]:
H_labels = ['net', 'down', 'up']

for label in H_labels:
    for mm, mmi in zip(minmaxs, minmaxs[::-1]):
        dg['thermal_footprint_%s_%s' % (label, mm)] = \
            np.abs(dg['H_%s_%s' % (label, mm)]) / background_HF
        dg['thermal_radius_%s_%s' % (label, mm)] = \
            np.sqrt(np.abs(dg['thermal_footprint_%s_%s' % (label, mm)]) / np.pi)


## Report thermal footprint

In [88]:
#
for label in H_labels:
    print('mean thermal footprint  %s = %0.2e (%0.2e - %0.2e) (m2)' 
          % (label, 
             dg['thermal_footprint_%s_best' % label].mean(), 
             dg['thermal_footprint_%s_min' % label].mean(),
             dg['thermal_footprint_%s_max' % label].mean()))
    print('mean thermal radius %s = %0.2e (%0.2e - %0.2e) (m)' 
          % (label, 
             dg['thermal_radius_%s_best' % label].mean(),
             dg['thermal_radius_%s_min' % label].mean(),
             dg['thermal_radius_%s_max' % label].mean()))


mean thermal footprint  net = 6.98e+06 (5.29e+06 - 9.13e+06) (m2)
mean thermal radius net = 9.89e+02 (7.77e+02 - 1.17e+03) (m)
mean thermal footprint  down = 1.62e+07 (1.16e+07 - 1.98e+07) (m2)
mean thermal radius down = 1.65e+03 (1.33e+03 - 1.84e+03) (m)
mean thermal footprint  up = 1.02e+07 (6.89e+06 - 1.26e+07) (m2)
mean thermal radius up = 1.27e+03 (9.37e+02 - 1.45e+03) (m)


## Summarize contribution of viscous dissipation

In [97]:
#for mm in minmaxs:
#    for l in H_labels:
#        dg['Hv_%s_%s' % (label, mm)]

dg['Hv_ratio_min'] = dg['Hv_net_min'] / (dg['H_net_min'] + dg['Hv_net_min'])
dg['Hv_ratio_best'] = dg['Hv_net_best'] / (dg['H_net_best'] + dg['Hv_net_best'])
dg['Hv_ratio_max'] = dg['Hv_net_max'] / (dg['H_net_max'] + dg['Hv_net_max'])

dg['Hv_ratio_best'].describe(), dg['Hv_ratio_min'].describe(), dg['Hv_ratio_max'].describe()

(count    210.000000
 mean       0.193311
 std        0.202895
 min        0.002663
 25%        0.060377
 50%        0.121288
 75%        0.239062
 max        1.000000
 Name: Hv_ratio_best, dtype: float64,
 count    210.000000
 mean       0.459409
 std        0.364788
 min        0.005380
 25%        0.137102
 50%        0.326507
 75%        0.882419
 max        1.000000
 Name: Hv_ratio_min, dtype: float64,
 count    210.0
 mean       0.0
 std        0.0
 min        0.0
 25%        0.0
 50%        0.0
 75%        0.0
 max        0.0
 Name: Hv_ratio_max, dtype: float64)

## Compare thermal springs flux to total groundwater flux

In [90]:
total_rch = area_alps * dg['recharge'].mean()

year = 365.25 * 24 * 3600.
print('total recharge = %0.2e km3 a-1' % (total_rch / 1e9))

#print('total discharge = %0.2e m3 s-1 = %0.2e km3 a-1' % (dg['flow_rate'](total_rch / 1e9))

print('contribution to groundwater budget of n=%i springs\n= %0.2e m3 s^-1 = %0.2e mm a^-1 = %0.2e percent of total recharge in Alps' 
      % (dg['flow_rate'].notnull().sum(), 
         dg['flow_rate'].sum(),
         dg['flow_rate'].sum() * year / area_alps * 1e3,
         dg['flow_rate'].sum() * year / total_rch * 100))

dg['flow_rate_est'] = dg['flow_rate']
ok = dg['flow_rate'].notnull()
nok = dg['flow_rate'].isnull()
dfr.loc[ix, 'mean_flow_rate'] = dg.loc[ok, 'flow_rate'].mean()
dg.loc[nok, 'flow_rate_est'] = dfr.loc[ix, 'mean_flow_rate']

dfr.loc[ix, 'n_springs'] = dg['flow_rate_est'].notnull().sum()
dfr.loc[ix, 'area_km2'] = area_alps / 1e6
dfr.loc[ix, 'median_temperature'] = np.median(dg['temperature'].dropna())
dfr.loc[ix, 'total_spring_flux_km3_yr-1'] = (dg['flow_rate'].sum() * year / 1e9)
dfr.loc[ix, 'total_spring_flux_est_km3_yr-1'] = (dg['flow_rate_est'].sum() * year / 1e9)
dfr.loc[ix, 'total_recharge_km3_yr-1'] =  total_rch / 1e9
dfr.loc[ix, 'percentage_of_total_meteoric_gw']  = dg['flow_rate_est'].sum() * year / total_rch * 100.0



total recharge = 1.05e+02 km3 a-1
contribution to groundwater budget of n=210 springs
= 1.88e+00 m3 s^-1 = 2.93e-01 mm a^-1 = 5.65e-02 percent of total recharge in Alps


## some more stats

In [91]:
print('mean discharge for %i springs = %0.2e m3/sec' % (dg['flow_rate'].notnull().sum(),
                                                        dg['flow_rate'].mean()))

print('mean net heat flux = %0.2e (%0.2e - %0.2e) W' % (dg['H_net_best'].mean(),
                                                       dg['H_net_min'].mean(),
                                                       dg['H_net_max'].mean()))



mean discharge for 210 springs = 8.93e-03 m3/sec
mean net heat flux = 5.30e+05 (4.02e+05 - 6.94e+05) W


## Save the modified csv file with the additional heat flux data

In [92]:
spring_data_file_mod = spring_data_file.split('with')[0] + 'with_HF_estimates.csv'

print('saving modified csv file as ', spring_data_file_mod)
dg.to_csv(spring_data_file_mod, index=False, index_label=False, encoding=encoding)

print('done')

saving modified csv file as  data/thermal_springs_alps_with_HF_estimates.csv
done


## Save clean version with the estimate HF values omitted

In [93]:
spring_data_file_clean = spring_data_file.split('with')[0] + 'with_HF_estimates_clean.csv'

cols_to_save = dg.columns

cols_to_save = [c for c in cols_to_save if '_est' not in c]

cols_to_save.remove('3He')
cols_to_save.remove('4He')

dc = dg[cols_to_save]

chem_cols = ['TDS', 'TDS_min', 'TDS_max', 'Na', 'Ca', 'Mg', 'K', 
             'NH4', 'Cl', 'F', 'SO4', 'HCO3', 'CO3', 'NO3', 
             'Si', 'Li', 'SiO2']

# rename columsn to add units
for c in cols_to_save:
    nc = c
    if 'circ_depth' in c:
        nc += '_(m)'
    elif c[:2] == 'H_':
        nc += '_(W)'
    elif c[:3] == 'Hv_':
        nc += '_(W)'
    elif 'depth' in c:
        nc += '_(m)'
    elif 'temp' in c:
        nc += '_(degr_C)'
    elif 'footprint' in c:
        nc += '_(m^2)'
    elif 'radius' in c:
        nc += '_(m)'
    elif 'elev' in c:
        nc += '_(m)'
    elif 'relief' in c:
        nc += '_(m)'
    elif 'gw_lvl' in c:
        nc += '_(m)'
    elif 'area' in c:
        nc += '_(m^2)'
    elif 'flow_rate' in c:
        nc += '_(m^3_s^-1)'
    elif 'recharge' in c:
        nc += '_(m_a^-1)'
    elif c in chem_cols:
        nc += '_(mg L^-1)'
    elif c == '3H':
        nc += '_(TU)'
    elif 'delta_14C' in c:
        nc += '_(pmc)'
    elif 'delta' in c:
        nc += '_(permille)'
    elif 'EC' in c:
        nc += '_(S_m^-1)'
    elif 'long' in c:
        nc = 'longitude'
    elif 'lat' in c:
        nc = 'latitude'
    
    dc = dc.rename(columns={c:nc})  

# sort HF, thermal footprint and thermal radius cols
c = dc.columns
reorg_cols = ['H_net', 'H_down', 'H_up', 'Hv_net', 'thermal_footprint', 'thermal_radius']

nc = c
for ri in reorg_cols:
    nc = [ci for ci in nc if ri not in ci]

nc2 = nc
mm = ['best', 'min', 'max']
for r in reorg_cols:
    for mi in mm:
        c1 = [ci for ci in c if r in ci and mi in ci]
        if len(c1) > 0:
            nc2 += [c1[0]]
        
dc = dc[nc2]

# save csv file
#print('saving cleaned csv file with columns ', dc.columns.tolist())

#print('saving modified csv file as ', spring_data_file_clean)
dc.to_csv(spring_data_file_clean, index=False, index_label=False, encoding=encoding)



## Save summary csv file

In [94]:
summary_file = 'data/summary_thermal_springs_data.csv'
print('saving summary data file as %s' % summary_file)
dfr.to_csv(summary_file, float_format='%.2f', index_label='dataset')

saving summary data file as data/summary_thermal_springs_data.csv
