# Set up file 

In [1]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable  
import matplotlib
import scipy
import scipy.interpolate
import sys
sys.path.insert(0, '/srv/ccrc/data06/z5145948/Python/python_from_R/Holocene/sampled_models/plotting_files/')
from plott import plott
import scipy.interpolate
#from mpl_toolkits.basemap import Basemap
matplotlib.use('agg')
from Cross_section import Cross_section
from Proxy_graph_masked import Proxy_graph
import Config
#from Map_plot import Map_plot
from collections import Counter
import scipy.stats as stats
from pylab import *
rcParams['legend.numpoints'] = 1
from matplotlib.ticker import MaxNLocator
import time
import os
import ast
from matplotlib import gridspec

matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['text.latex.preamble'] = [
    r'\usepackage{wasysym}',
    r'\usepackage{textcomp}']

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 1200

%matplotlib inline

In [2]:
# Import standardised latitudes and depths
lat_standardised = np.genfromtxt('/srv/ccrc/data06/z5145948/Moving_water_mass/Data/latitude_levels.csv', delimiter=',')
dep_standardised = np.genfromtxt('/srv/ccrc/data06/z5145948/Moving_water_mass/Data/depth_levels.csv', delimiter=',')

# File saving location
overleaf_loc = '/home/z5145948/Dropbox/Apps/Overleaf/Paper_2/Figures'

# Set universal dividing depth
dividing_depth = 2500

# Set colors
alpha = 0.5
point_color_deep = (0,0,1,alpha)#'blue'
point_color_shallow = (1,0,0,alpha)#'red'
line_color_deep = 'cyan'
line_color_shallow = 'magenta'

# Font size
fontsize = 20

font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : fontsize}
matplotlib.rc('font', **font)

# Separate into different areas
point_size_power = 1
points_size_multiplier = 10
south_lat_max = -15
north_lat_min = 15
lw = 4
edgewidth=0.5
size = 40
weighting = False

slice_width = 1.0



# Define Wrangling Functions

In [3]:
####3 reading data function

def read_data(folder):

    # Import simulation details (summary) as dataframe
    summary = pd.read_csv(folder + '_summary.txt', delimiter = ' ')

    # Import simulation outputs
    fh = Dataset(folder + 'output.nc')
    proxy_simulations = fh.variables['var1_1'][:] 

    # Import samples (proxy data)
    samples = pd.read_csv( folder + '_samples.txt', delimiter = ' ')

    # Drop all unnecessary rows in summary
    summary = summary.drop(['type', 'Row.names', 'reps', 'model', 'dataset', 'filename', 'success.rate'], axis = 1)

    # Convert run.no to netcdf file index
    summary['run.no'] = summary['run.no'] - 1
    samples['run.no'] = samples['run.no'] - 1
    
    return(proxy_simulations, samples, summary)

In [4]:
# Define a function for reading and including cores that are in the Oliver compilation but not in the Peterson data set

def Oliver_cores(minn, maxx):
    import pandas as pd
    import numpy as np
    folder_location = '/srv/ccrc/data06/z5145948/Moving_water_mass/Data/Core_files/'
    file_mat = ['GeoB4403_2.txt',
             'GeoB1028_5.txt',
             'GeoB2109_1.txt',
             'GeoB3801_6.txt',
             'V22_38.txt',
             'V28_56.txt',
             'V27_20.txt',
             'RC12_339.txt',
             'V32_128.txt',
             'GIK16772_1.txt',
             'MD96_2080.txt',
             'MD06_3018.txt',
             'NEAP18K.txt',
             'KNR140_37JPC.txt']


    location = ['Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Indian,','Pacific,','Atlantic,','Atlantic,','Pacific,','Atlantic,','Atlantic,']

    oliver_data = []

    i = 0

    while i < len(file_mat):
        with open(folder_location + file_mat[i]) as f:
            for line in f:
                oliver_data.append(location[i] + line)
        i += 1

    df = pd.DataFrame([sub.split(",") for sub in oliver_data])

    df.columns = ['Location','Core','Lat','Lon','Dep','Core depth','age','Species','pl1','pl2','d18O benthic','d13C']

    df = df[['Core','Location','Lat','Lon','Dep','d13C','age']]

    df['d13C'] = [i.rstrip() for i in df['d13C']]

    df = df[df['age'].astype(float) > float(minn)]
    df = df[df['age'].astype(float) < float(maxx)]

    ################################# other data                                                                                                                                                                                                                                  

    file_mat = ['CH69_K09.txt',
    'MD03_2664.txt',
    'MD95_2042.txt',
    'U1308.txt',
    'ODP1063.txt']

    locations = ['CH69_K09\tAtlantic\t41.75\t-47.35\t4100\t',
              'MD03_2664\tAtlantic\t57.439000\t-48.605800\t3442.0\t',
              'MD95_2042\tAtlantic\t37.799833\t-10.166500\t3146.0\t',
              'U1308\tAtlantic\t49.877760\t-24.238110\t3871.0\t',
              'ODP1063\tAtlantic\t33.683333\t-57.616667\t4584\t']

    other_data = []
    i = 0

    while i < len(file_mat):
        with open(folder_location + file_mat[i]) as f:
            for line in f:
                other_data.append(locations[i]+line)
        i += 1

    df2 = pd.DataFrame([sub.split("\t") for sub in other_data])

    df2.columns = ['Core','Location','Lat','Lon','Dep','Core Depth','age','d13C']

    df2 = df2[['Core','Location','Lat','Lon','Dep','d13C','age']]

    df2['d13C'] = [i.rstrip() for i in df2['d13C']]

    df2 = df2[df2['age'].astype(float) > float(minn)]
    df2 = df2[df2['age'].astype(float) < float(maxx)]

    results = pd.concat([df, df2])

    results = results[results['d13C'] != '']

    results['d13C'] = results['d13C'].astype(float)
    results['Lat'] = results['Lat'].astype(float)
    results['Lon'] = results['Lon'].astype(float)
    results['Dep'] = results['Dep'].astype(float)

    return(results)    
    

In [5]:

def pl_cores(minn, maxx):

    names = ['Core', 'Location', 'Lat', 'Lon', 'Dep']

    # Read in the data
    indopac = pd.read_table("../Moving_water_mass/Data/Core_files/indopac_core_data_LS16.txt", delimiter = ',', names = names)
    atl = pd.read_table("../Moving_water_mass/Data/Core_files/atl_core_data_LS16.txt", delimiter = ',', names = names)
    add = pd.read_table("../Moving_water_mass/Data/Core_files/Additional_core_locations.txt", delimiter = ',', usecols = [0, 1, 2, 3, 4], names = names)

    # Join all into a single dataframe
    df = indopac.append(atl)
    df = df.append(add)
    df = df.reset_index(drop = True)

    df['d18O names'] = df['Core'] + '_ageLS16.txt'
    df['d13C names'] = df['Core'] + '_d13C.txt'

    # Loop over the dataset and interpolate each core
    i = 0
    results_dict = {}

    while i < df.count()[0]:

        try:
            df_d18O = pd.read_table('../Moving_water_mass/Data/Core_files/' + df.loc[i]['d18O names'], delim_whitespace = True, names = ['depth', 'age'], skip_blank_lines = True, na_values = 'NAN')
        except:
            i += 1
            continue
        try:
            df_d13C = pd.read_table('../Moving_water_mass/Data/Core_files/' + df.loc[i]['d13C names'], delim_whitespace = True, names = ['depth', 'd13C'], skip_blank_lines = True, na_values = 'NAN')
        except:
            i += 1
            continue

        df_d18O = df_d18O.dropna(subset = ['age']) 
        df_d13C = df_d13C.dropna(subset = ['d13C'])

        df_d18O = df_d18O.reset_index(drop = True)
        df_d13C = df_d13C.reset_index(drop = True)

        interp = scipy.interpolate.interp1d(df_d18O['depth'], df_d18O['age'], bounds_error = True)
        try:
            df_d13C['age'] = interp(df_d13C['depth'])
        except:
            try:
                interp2 = scipy.interpolate.interp1d(df_d18O['depth'], df_d18O['age'], bounds_error = False)
                df_d13C['age'] = interp2(df_d13C['depth'])
            except:
                i += 1
                continue

        df_d13C = df_d13C.dropna(subset = ['age'])
        df_d13C = df_d13C.reset_index(drop = True)

        df_d13C = df_d13C[df_d13C['age'] > minn]
        df_d13C = df_d13C[df_d13C['age'] < maxx]

        if len(df_d13C) > 0:
            df_results = df.drop(['d18O names', 'd13C names'], axis = 1)
            df_results = df_results.loc[df_results.index.repeat(len(df_d13C))].loc[[i]]

            df_d13C = df_d13C.drop(['depth'], axis = 1)

            df_results = df_results.reset_index(drop = True).join(df_d13C.reset_index(drop = True))
            results_dict.update({
                df_results.Core[0] : df_results.drop(['Core'], axis = 1)
            })

        i += 1
        
    df_results = pd.concat(results_dict).reset_index()
    df_results = df_results.rename(columns = {'level_0' : 'Core'})
    df_results = df_results.drop(['level_1'], axis = 1)

    return(df_results)
    

In [6]:

def slicing_data(time_min,time_max,location_filter):
    # ################## read in the oliver data using the predefined function

    # Age range to look over

    df_pl = pl_cores(time_min,time_max)

    df_oliver = Oliver_cores(time_min,time_max)
    
    ###################3 join the dataframes

    df_results = df_pl.append(df_oliver)

    #################### get only the atlantic cores

    df = df_results.reset_index(drop = True)
    df['Dep'] = abs(df['Dep'])

    df_atl = df[df['Location'] == location_filter]
    df_atl = df_atl.reset_index(drop = True)
    df_atl['age'] = df_atl.age.astype(float)

    # Slice the data
    lower = np.arange(time_min, time_max, slice_width)
    upper = np.arange(time_min+slice_width, time_max+slice_width, slice_width)

    proxy_compilation = {}

    for low, up in zip(lower, upper):
        df_slice = df_atl[(df_atl['age'] >= low) & (df_atl['age'] < up)]
        grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

        slice_averaged = {}

        for key, group in grouped_slice:
            group_averaged = group.mean()
            group_count = group.count()

            group_averaged['count'] = group_count['d13C']
            slice_averaged.update({
                group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
            })

        slice_averaged = pd.concat(slice_averaged, axis=1).T
        slice_averaged = slice_averaged.drop(['age'],axis=1)

        proxy_compilation.update({
            low : slice_averaged.T
        })

    proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
    proxy_compilation = proxy_compilation.drop('level_2',axis=1)
    proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})
    
    return(proxy_compilation)

In [7]:
def slice_interpolation(proxy_compilation,dividing_depth):
    # interpolating across the entire dataset

    # Add time bounds to the samples table
    samples_with_time_period = proxy_compilation

    unique_cores = np.unique(samples_with_time_period['Core'])
    years_list = np.unique(samples_with_time_period.lower)

    samples_with_time_period = samples_with_time_period.reset_index(drop=True)

    interpolated_proxies = {}

    for unique_core in unique_cores:

        # get a single proxy
        df_temp = samples_with_time_period[samples_with_time_period['Core'] == unique_core]

        try:
            # interpolate the dataset
            interp = scipy.interpolate.interp1d(df_temp['lower'],
                                                df_temp['d13C'],
                                                bounds_error = False)
            single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})        
            location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth','count']]] * len(single_proxy_interpolated), axis=1).T
            single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    

            interpolated_proxies.update({
                unique_core : single_proxy_interpolated
            })        

        except ValueError:
            interpolated_proxies.update({
                unique_core : df_temp.drop('Core',axis=1)
            })

    interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=False).rename(columns={'level_0':'Core'}).drop(['level_1'],axis=1)
    interpolated_samples_backup = interpolated_samples.copy()

    # Drop count because this count doesn't make sense
    interpolated_samples = interpolated_samples.drop('count',axis=1)

    # Merge the original dataframe to get counts back (nan for interpolated samples)
    interpolated_samples = pd.merge(interpolated_samples, proxy_compilation[['lower','Core','count']],
                                    how='outer', left_on=['lower','Core'], right_on=['lower','Core'])

    # Drop all nan d13C values (interpolation tried but out of range)
    interpolated_samples = interpolated_samples[np.isfinite(interpolated_samples['d13C'])]

    # Divide into a shallow and deep dataframe
    shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth]
    deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth]
    
    return(interpolated_samples, shallower_samples, deeper_samples)
    

In [8]:
def make_latex_table(proxy_compilation,latex_name):
    # Save list of cores to latex table to include in paper
    latex_table = proxy_compilation.drop(['d13C','count','lower'],axis=1)
    latex_table['Reference'] = '-'
    latex_table = latex_table.drop_duplicates()

    # Rename columns
    latex_table = latex_table.rename(columns={'Lat':'Latitude','Lon' : 'Longitude', 'Ocean_depth' : 'Depth (m)'})
    latex_table['Latitude'] = [str(round(x, 2)) for x in latex_table.Latitude]
    latex_table['Longitude'] = [str(round(x, 2)) for x in latex_table.Longitude]
    latex_table.sort_values(by='Core',inplace=True)

    # Convert to string of latex markdown
    latex_string = latex_table.to_latex(index=False,longtable=True)

    # Reformat some parts of the latex table
    latex_string = latex_string.replace('\\toprule','')
    latex_string = latex_string.replace('\\midrule','')
    latex_string = latex_string.replace('\\bottomrule','')

    # Write to a file
    file1 = open(latex_name,"w") 
    file1.write(latex_string) 
    file1.close() #to change file access modes 
  

In [9]:

def Atlantic_regions(interpolated_samples):
    # Get volume weight average with time slices
    interpolated_samples['weights'] = np.nan
    interpolated_samples['regions'] = np.nan    

    # NEA
    interpolated_samples.loc[((interpolated_samples['Lon'] < 20) | (interpolated_samples['Lon'] > (-33))) & (interpolated_samples['Lat'] > 0.1),
                            'weights'] = 4.3
    interpolated_samples.loc[((interpolated_samples['Lon'] < 20) | (interpolated_samples['Lon'] > (-33))) & (interpolated_samples['Lat'] > 0.1),
                            'regions'] = 'NEA'
    

    # NWA
    interpolated_samples.loc[(interpolated_samples['Lon'] < (-33)) & (interpolated_samples['Lon'] > (-180)) & (interpolated_samples['Lat'] > 0.1),
                             'weights'] = 4.9
    interpolated_samples.loc[(interpolated_samples['Lon'] < (-33)) & (interpolated_samples['Lon'] > (-180)) & (interpolated_samples['Lat'] > 0.1),
                             'regions'] = 'NWA'

    # SEA
    interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-14.6))) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                             'weights'] = 3.5
    interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-14.6))) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                             'regions'] = 'SEA'
    
    # SA
    interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-22))) & (interpolated_samples['Lat'] < -40) & (interpolated_samples['Lat'] > -55),
                            'weights'] = 0.7
    interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-22))) & (interpolated_samples['Lat'] < -40) & (interpolated_samples['Lat'] > -55),
                            'regions'] = 'SA'    

    # SWA
    interpolated_samples.loc[(interpolated_samples['Lon'] > (-60)) & (interpolated_samples['Lon'] < (-14.6)) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                            'weights'] = 5.0
    interpolated_samples.loc[(interpolated_samples['Lon'] > (-60)) & (interpolated_samples['Lon'] < (-14.6)) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                            'regions'] = 'SWA'    
    
    return(interpolated_samples)
    
def Pacific_regions(interpolated_samples):
    # Get volume weight average with time slices
    interpolated_samples['weights'] = np.nan
    interpolated_samples['regions'] = np.nan      

    # North
    interpolated_samples.loc[(interpolated_samples['Lat'] > 0) ,'weights'] = 21.2
    interpolated_samples.loc[(interpolated_samples['Lat'] > 0) ,'regions'] = 'NP'    

    # South
    interpolated_samples.loc[(interpolated_samples['Lat'] < 0) ,'weights'] = 23.9
    interpolated_samples.loc[(interpolated_samples['Lat'] < 0) ,'regions'] = 'SP'
    
    return(interpolated_samples)

def Indian_regions(interpolated_samples):
    # Get volume weight average with time slices # Use a single region for the Indian Ocean
    interpolated_samples['weights'] = 1
    interpolated_samples['regions'] = 'I'
    
    return(interpolated_samples)


In [10]:
def averaging_by_region(interpolated_samples):
    # group the cores based on the age and the region (indicated by weights)
    grouped_by_age_region = interpolated_samples.groupby(['lower', 'regions'])
    
    averaged_by_age_region = {}
    stdev_by_age_region = {}
    measurement_count = {}
    core_count = {}
    CI_by_age_region = {}
    
    for key, group in grouped_by_age_region:
        
        # find group means
        averaged_by_age_region.update({
            key: np.mean(group)
        })
        # find total number of cores
        core_count.update({
            key: len(group['count'].dropna())
        }) 
        # find total measurement count
        measurement_count.update({
            key: np.nansum(group['count'])
        })
        # find the standard deviation across the slices and cores
        stdev_by_age_region.update({
            key: np.std(group['d13C'])
        })
        # express variation as a confidence interval
        CI_by_age_region.update({
            key: 1.96 * np.std(group['d13C'])/(np.sqrt(len(group['count'].dropna()))) #CI of 95%, 1.96 is zscore
        })        
        
    # Convert dictionaries to dataframes
    averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=False).rename({'level_1':'regions'},axis=1).drop('level_0',axis=1)
    measurement_count = pd.DataFrame.from_dict(measurement_count,orient='index')
    core_count = pd.DataFrame.from_dict(core_count,orient='index')
    stdev_by_age_region = pd.DataFrame.from_dict(stdev_by_age_region,orient='index')
    CI_by_age_region = pd.DataFrame.from_dict(CI_by_age_region,orient='index')    
    
    # Add columns of number of cores and number of measurements to the dataframe
    averaged_by_age_region['measurement_count'] = list(measurement_count[0])
    averaged_by_age_region['core_count'] = list(core_count[0])  
    averaged_by_age_region['slice_stdev'] = list(stdev_by_age_region[0])
    averaged_by_age_region['CI'] = list(CI_by_age_region[0])
    
    # Now there is one values for each region (weight) and each year combination
    # Group by years and use weights to find the average d13C for that time period

    grouped_by_age = averaged_by_age_region.drop('regions',axis=1).groupby('lower')

    averaged_by_age = {}
    stdev_by_age = {}

    # find group means
    for key, group in grouped_by_age:

        # Find the normal average
        avg = np.mean(group.d13C)

        averaged_by_age.update({
            np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
        })
        stdev_by_age.update({
            np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
        }) 


    averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
    stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

    averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
    averaged_by_age = averaged_by_age.sort_values('lower')
    
    return(averaged_by_age,averaged_by_age_region)


In [11]:

def sort_data(time_min,time_max,region_func,interp_data_name,notinterp_data_name,latex_name,location_filter):
    # Slice the data
    proxy_compilation = slicing_data(time_min,time_max,location_filter)
    
    # Interpolate the data and divide into a shallow and deep dataframe
    interpolated_samples, shallower_samples, deeper_samples = slice_interpolation(
        proxy_compilation,dividing_depth)
    
    # write latex table for paper
    make_latex_table(proxy_compilation,latex_name)
    
    # get region weights
    interpolated_samples = region_func(interpolated_samples)

    # Get regional average and ocean basin averages
    averaged_by_age, averaged_by_age_region = averaging_by_region(interpolated_samples)

    # drop weights (because I'm not using them now)
    interpolated_samples = interpolated_samples.drop('weights',axis=1)

    # Save the datasets
    averaged_by_age.to_csv(notinterp_data_name, index=False)
    interpolated_samples.to_csv(interp_data_name,index=False)
    
    return(averaged_by_age, averaged_by_age_region, interpolated_samples)


# Run the functions

In [12]:
# Set time periods
Hol_min = 2
Hol_max = 8
LIG_min = 118
LIG_max = 130

# set up other variables

# Holocene, Atlantic
Hol_atl_latex_name = "Figures/Holocene_cores_tables_Atlantic.tex"
Hol_atl_nointerp_data_name = "Data/Atlantic_Holocene_profile_PL.csv"
Hol_atl_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Hol_Atlantic.csv"

# LIG, Atlantic
LIG_atl_latex_name = "Figures/LIG_cores_tables_Atlantic.tex"
LIG_atl_nointerp_data_name = "Data/Atlantic_LIG_profile_PL.csv"
LIG_atl_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Atlantic.csv"

# Holocene, Pacific
Hol_pac_latex_name = "Figures/Holocene_cores_tables_Pacific.tex"
Hol_pac_nointerp_data_name = "Data/Pacific_Holocene_profile_PL.csv"
Hol_pac_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Hol_Pacific.csv"

# LIG, Pacific
LIG_pac_latex_name = "Figures/LIG_cores_tables_Pacific.tex"
LIG_pac_nointerp_data_name = "Data/Pacific_LIG_profile_PL.csv"
LIG_pac_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Pacific.csv"

# Holocene, Indian
Hol_ind_latex_name = "Figures/Holocene_cores_tables_Indian.tex"
Hol_ind_nointerp_data_name = "Data/Indian_Holocene_profile_PL.csv"
Hol_ind_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Hol_Indian.csv"

# LIG, Indian
LIG_ind_latex_name = "Figures/LIG_cores_tables_Indian.tex"
LIG_ind_nointerp_data_name = "Data/Indian_LIG_profile_PL.csv"
LIG_ind_interp_data_name = "/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Indian.csv"

In [13]:
# Run function for each scenario (three oceans, two time periods)

interpolated_results_dict = {}
average_age_interpolated_results_dict = {}
average_age_region_interpolated_results_dict = {}

for label,time_min,time_max,region_func,interp_data_name,notinterp_data_name,latex_name,location_filter in zip(
    ['Hol_atl','LIG_atl','Hol_pac','LIG_pac','Hol_ind','LIG_ind'],
    [Hol_min,LIG_min,Hol_min,LIG_min,Hol_min,LIG_min],
    [Hol_max,LIG_max,Hol_max,LIG_max,Hol_max,LIG_max],
    [Atlantic_regions,Atlantic_regions,Pacific_regions,Pacific_regions,Indian_regions,Indian_regions],
    [Hol_atl_interp_data_name,LIG_atl_interp_data_name,Hol_pac_interp_data_name,LIG_pac_interp_data_name,Hol_ind_interp_data_name,LIG_ind_interp_data_name],
    [Hol_atl_nointerp_data_name,LIG_atl_nointerp_data_name,Hol_pac_nointerp_data_name,LIG_pac_nointerp_data_name,Hol_ind_nointerp_data_name,LIG_ind_nointerp_data_name],
    [Hol_atl_latex_name,LIG_atl_latex_name,Hol_pac_latex_name,LIG_pac_latex_name,Hol_ind_latex_name,LIG_ind_latex_name],
    ['Atlantic','Atlantic','Pacific','Pacific','Indian','Indian']):
    
    # Run function
    averaged_by_age, averaged_by_age_region, interpolated_samples = sort_data(
        time_min,time_max,region_func,interp_data_name,notinterp_data_name,latex_name,location_filter
    )
    
    # Add results to dictionary
    interpolated_results_dict.update({
        label : interpolated_samples
    })
    average_age_interpolated_results_dict.update({
        label : averaged_by_age
    })   
    average_age_region_interpolated_results_dict.update({
        label : averaged_by_age_region
    })     

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [14]:
# Create results dataframes from dictionaries
df_interpolated_results = pd.concat(interpolated_results_dict).reset_index(drop=True)
df_average_age_interpolated_results = pd.concat(average_age_interpolated_results_dict).reset_index().drop('level_1',axis=1).rename(columns=
                                                                                 {'level_0':'location'})
df_average_age_region_interpolated_results = pd.concat(average_age_region_interpolated_results_dict).reset_index().drop('level_1',axis=1).rename(columns=
                                                                                 {'level_0':'location'})

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """


In [15]:
# peak LIG and peak Holocene time periods
LIG_peak_min = 120
LIG_peak_max = 125
Hol_peak_min = 4
Hol_peak_max = 7

# find average results of peak period
df_LIG_peak = df_average_age_region_interpolated_results[(df_average_age_region_interpolated_results['lower'] > LIG_peak_min) & (df_average_age_region_interpolated_results['lower'] < LIG_peak_max)]
df_Hol_peak = df_average_age_region_interpolated_results[(df_average_age_region_interpolated_results['lower'] > Hol_peak_min) & (df_average_age_region_interpolated_results['lower'] < Hol_peak_max)]


# Peak Holocene and LIG summary

In [16]:
# Average over all time slices for LIG 
LIG_peak_averaged_dict = {}
for index,group in df_LIG_peak.groupby(['location','regions']):
    LIG_peak_averaged_dict.update({
        index:pd.DataFrame(np.mean(group)).T
    })
df_LIG_peak_averaged = pd.concat(LIG_peak_averaged_dict)
    
# Average over all time slices for Holocene
Hol_peak_averaged_dict = {}
for index,group in df_Hol_peak.groupby(['location','regions']):
    Hol_peak_averaged_dict.update({
        index:pd.DataFrame(np.mean(group)).T
    })    
df_Hol_peak_averaged = pd.concat(Hol_peak_averaged_dict)    
    
# Format dataframes for merge
df_Hol_peak_averaged = df_Hol_peak_averaged.reset_index().drop(['weights','lower','count','level_2','level_0'],axis=1)
df_Hol_peak_averaged = df_Hol_peak_averaged.rename(columns={'level_1':'Location'})
df_Hol_peak_averaged.set_index(['Location'],inplace=True)

df_LIG_peak_averaged = df_LIG_peak_averaged.reset_index().drop(['lower','count','level_2','level_0'],axis=1)
df_LIG_peak_averaged = df_LIG_peak_averaged.rename(columns={'level_1':'Location'})
df_LIG_peak_averaged.set_index(['Location'],inplace=True)

In [17]:
# Rearange order of columns
df_LIG_peak_averaged_sorted = df_LIG_peak_averaged[['d13C','measurement_count','core_count','slice_stdev','CI']]
df_Hol_peak_averaged_sorted = df_Hol_peak_averaged[['d13C','measurement_count','core_count','slice_stdev','CI']]


In [18]:
# Join results into a single table
combined_results = df_Hol_peak_averaged_sorted.join(df_LIG_peak_averaged_sorted,lsuffix=' Holocene',rsuffix=' LIG',sort=False)


In [19]:
combined_results


Unnamed: 0_level_0,d13C Holocene,measurement_count Holocene,core_count Holocene,slice_stdev Holocene,CI Holocene,d13C LIG,measurement_count LIG,core_count LIG,slice_stdev LIG,CI LIG
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
NEA,0.938611,72.5,38.5,0.19868,0.062757,0.761522,26.0,11.75,0.209845,0.123354
NWA,0.806101,28.5,14.0,0.238433,0.125678,0.658415,38.5,7.25,0.23039,0.173702
SA,-0.025208,6.5,3.0,0.17594,0.198458,0.011667,2.25,1.5,0.258489,0.429321
SEA,0.629537,10.5,9.5,0.254233,0.161938,0.53857,6.0,6.0,0.22653,0.18297
SWA,0.916708,6.0,4.5,0.422232,0.406669,0.476493,1.5,1.5,0.205076,0.337902
I,0.173,6.0,4.5,0.238893,0.223024,0.036607,3.0,2.75,0.206315,0.244906
NP,0.05104,12.0,7.0,0.215794,0.159862,-0.091236,5.0,3.5,0.267256,0.286405
SP,0.474761,12.5,4.0,0.263107,0.257845,0.136365,1.5,1.5,0.19679,0.332165


In [20]:
# Format the numbers
combined_results[['core_count Holocene','measurement_count Holocene',
                  'core_count LIG','measurement_count LIG']] = \
                    combined_results[['core_count Holocene','measurement_count Holocene',
                    'core_count LIG','measurement_count LIG']].astype(int)
combined_results = combined_results.round(2)

In [21]:
combined_results

Unnamed: 0_level_0,d13C Holocene,measurement_count Holocene,core_count Holocene,slice_stdev Holocene,CI Holocene,d13C LIG,measurement_count LIG,core_count LIG,slice_stdev LIG,CI LIG
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
NEA,0.94,72,38,0.2,0.06,0.76,26,11,0.21,0.12
NWA,0.81,28,14,0.24,0.13,0.66,38,7,0.23,0.17
SA,-0.03,6,3,0.18,0.2,0.01,2,1,0.26,0.43
SEA,0.63,10,9,0.25,0.16,0.54,6,6,0.23,0.18
SWA,0.92,6,4,0.42,0.41,0.48,1,1,0.21,0.34
I,0.17,6,4,0.24,0.22,0.04,3,2,0.21,0.24
NP,0.05,12,7,0.22,0.16,-0.09,5,3,0.27,0.29
SP,0.47,12,4,0.26,0.26,0.14,1,1,0.2,0.33


In [23]:
# Convert columns to multilevel index
df_latex = combined_results
df_latex.reset_index(drop=False,inplace=True)
df_latex.columns = pd.MultiIndex.from_arrays([['Text',
                             'Holocene','Holocene','Holocene','Holocene','Holocene',
                             'LIG','LIG','LIG','LIG','LIG',],
                            ['Location','d13C','measurement_count','core_count','slice_stdev','CI','d13C','measurement_count','core_count','slice_stdev','CI']])

In [24]:
# Convert to string of latex markdown
latex_string = df_latex.to_latex(index=False,longtable=True)

# Reformat some parts of the latex table
latex_string = latex_string.replace('\\toprule','')
latex_string = latex_string.replace('\\midrule','')
latex_string = latex_string.replace('\\bottomrule','')
latex_string = latex_string.replace('\\\\','\\\\ \hline')
# Change column names
latex_string = latex_string.replace('measurement\_count','\\thead{Number of \\\\ measurements}')
latex_string = latex_string.replace('core\_count','\\thead{Number of \\\\ cores}')
latex_string = latex_string.replace('slice\_stdev','\\thead{$\sigma$ \\\\ (\permil)}')
latex_string = latex_string.replace('CI','\\thead{Confidence Interval \\\\ (\permil)}')
latex_string = latex_string.replace('d13C','\\thead{$\delta^{13}$C \\\\ (\permil)}')
latex_string = latex_string.replace('Text','\\multicolumn{1}{l}{}')
latex_string = latex_string.replace('{5}{l}','{5}{|c|}')

# Add caption to latex table
caption = [
    '\caption{Region breakdown of $\delta^{13}$C data during the Holocene and LIG.',
    'Number of measures and number of cores in each region is provided,',
    'with $\sigma$ representing one standard deviation between time slices,',
    'and the associated 95\\% confidence interval (CI).}',
    '\label{regional_summary_tb}'
]

caption = ' '.join(caption)

latex_string = latex_string.replace('{lrrrrrrrrrr}','{lcccccccccc}\\hline')


print(latex_string)

latex_name = 'Figures/regional_summary_table_PL.tex'
# Write to a file
file1 = open(latex_name,"w") 
file1.write(latex_string) 
file1.close() #to change file access modes 

\begin{longtable}{lcccccccccc}\hline

    \multicolumn{1}{l}{} & \multicolumn{5}{|c|}{Holocene} & \multicolumn{5}{|c|}{LIG} \\ \hline
Location &     \thead{$\delta^{13}$C \\ (\permil)} & \thead{Number of \\ measurements} & \thead{Number of \\ cores} & \thead{$\sigma$ \\ (\permil)} &    \thead{Confidence Interval \\ (\permil)} &  \thead{$\delta^{13}$C \\ (\permil)} & \thead{Number of \\ measurements} & \thead{Number of \\ cores} & \thead{$\sigma$ \\ (\permil)} &    \thead{Confidence Interval \\ (\permil)} \\ \hline

\endhead

\multicolumn{11}{r}{{Continued on next page}} \\ \hline

\endfoot


\endlastfoot
     NEA &     0.94 &                72 &         38 &        0.20 &  0.06 &  0.76 &                26 &         11 &        0.21 &  0.12 \\ \hline
     NWA &     0.81 &                28 &         14 &        0.24 &  0.13 &  0.66 &                38 &          7 &        0.23 &  0.17 \\ \hline
      SA &    -0.03 &                 6 &          3 &        0.18 &  0.20 &  0.01 &        

In [None]:
df_latex