# Set up file 

In [1]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable  
import matplotlib
import scipy
import scipy.interpolate
import sys
sys.path.insert(0, '/srv/ccrc/data06/z5145948/Python/python_from_R/Holocene/sampled_models/plotting_files/')
from plott import plott
import scipy.interpolate
from mpl_toolkits.basemap import Basemap
matplotlib.use('agg')
from Cross_section import Cross_section
from Proxy_graph_masked import Proxy_graph
import Config
from Map_plot import Map_plot
from collections import Counter
import scipy.stats as stats
from pylab import *
rcParams['legend.numpoints'] = 1
from matplotlib.ticker import MaxNLocator
import time
from IPython import embed
import os
import ast
from matplotlib import gridspec

matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['text.latex.preamble'] = [
    r'\usepackage{wasysym}',
    r'\usepackage{textcomp}']

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 1200

%matplotlib inline

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [2]:
# Import standardised latitudes and depths
lat_standardised = np.genfromtxt('/srv/ccrc/data06/z5145948/Moving_water_mass/Data/latitude_levels.csv', delimiter=',')
dep_standardised = np.genfromtxt('/srv/ccrc/data06/z5145948/Moving_water_mass/Data/depth_levels.csv', delimiter=',')

# File saving location
overleaf_loc = '/home/z5145948/Dropbox/Apps/Overleaf/Paper_2/Figures'

# Set universal dividing depth
dividing_depth = 2500

# Set colors
alpha = 0.5
point_color_deep = (0,0,1,alpha)#'blue'
point_color_shallow = (1,0,0,alpha)#'red'
line_color_deep = 'cyan'
line_color_shallow = 'magenta'

# Font size
fontsize = 20

font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : fontsize}
matplotlib.rc('font', **font)

# Separate into different areas
point_size_power = 1
points_size_multiplier = 10
south_lat_max = -15
north_lat_min = 15
lw = 4
edgewidth=0.5
size = 40
weighting = False

slice_width = 1.0



In [3]:
####3 reading data function

def read_data(folder):

    # Import simulation details (summary) as dataframe
    summary = pd.read_csv(folder + '_summary.txt', delimiter = ' ')

    # Import simulation outputs
    fh = Dataset(folder + 'output.nc')
    proxy_simulations = fh.variables['var1_1'][:] 

    # Import samples (proxy data)
    samples = pd.read_csv( folder + '_samples.txt', delimiter = ' ')

    # Drop all unnecessary rows in summary
    summary = summary.drop(['type', 'Row.names', 'reps', 'model', 'dataset', 'filename', 'success.rate'], axis = 1)

    # Convert run.no to netcdf file index
    summary['run.no'] = summary['run.no'] - 1
    samples['run.no'] = samples['run.no'] - 1
    
    return(proxy_simulations, samples, summary)

In [4]:
# Define a function for reading and including cores that are in the Oliver compilation but not in the Peterson data set

def Oliver_cores(minn, maxx):
    import pandas as pd
    import numpy as np
    folder_location = '/srv/ccrc/data06/z5145948/Moving_water_mass/Data/Core_files/'
    file_mat = ['GeoB4403_2.txt',
             'GeoB1028_5.txt',
             'GeoB2109_1.txt',
             'GeoB3801_6.txt',
             'V22_38.txt',
             'V28_56.txt',
             'V27_20.txt',
             'RC12_339.txt',
             'V32_128.txt',
             'GIK16772_1.txt',
             'MD96_2080.txt',
             'MD06_3018.txt',
             'NEAP18K.txt',
             'KNR140_37JPC.txt']


    location = ['Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Atlantic,','Indian,','Pacific,','Atlantic,','Atlantic,','Pacific,','Atlantic,','Atlantic,']

    oliver_data = []

    i = 0

    while i < len(file_mat):
        with open(folder_location + file_mat[i]) as f:
            for line in f:
                oliver_data.append(location[i] + line)
        i += 1

    df = pd.DataFrame([sub.split(",") for sub in oliver_data])

    df.columns = ['Location','Core','Lat','Lon','Dep','Core depth','age','Species','pl1','pl2','d18O benthic','d13C']

    df = df[['Core','Location','Lat','Lon','Dep','d13C','age']]

    df['d13C'] = [i.rstrip() for i in df['d13C']]

    df = df[df['age'].astype(float) > float(minn)]
    df = df[df['age'].astype(float) < float(maxx)]

    ################################# other data                                                                                                                                                                                                                                  

    file_mat = ['CH69_K09.txt',
    'MD03_2664.txt',
    'MD95_2042.txt',
    'U1308.txt',
    'ODP1063.txt']

    locations = ['CH69_K09\tAtlantic\t41.75\t-47.35\t4100\t',
              'MD03_2664\tAtlantic\t57.439000\t-48.605800\t3442.0\t',
              'MD95_2042\tAtlantic\t37.799833\t-10.166500\t3146.0\t',
              'U1308\tAtlantic\t49.877760\t-24.238110\t3871.0\t',
              'ODP1063\tAtlantic\t33.683333\t-57.616667\t4584\t']

    other_data = []
    i = 0

    while i < len(file_mat):
        with open(folder_location + file_mat[i]) as f:
            for line in f:
                other_data.append(locations[i]+line)
        i += 1

    df2 = pd.DataFrame([sub.split("\t") for sub in other_data])

    df2.columns = ['Core','Location','Lat','Lon','Dep','Core Depth','age','d13C']

    df2 = df2[['Core','Location','Lat','Lon','Dep','d13C','age']]

    df2['d13C'] = [i.rstrip() for i in df2['d13C']]

    df2 = df2[df2['age'].astype(float) > float(minn)]
    df2 = df2[df2['age'].astype(float) < float(maxx)]

    results = pd.concat([df, df2])

    results = results[results['d13C'] != '']

    results['d13C'] = results['d13C'].astype(float)
    results['Lat'] = results['Lat'].astype(float)
    results['Lon'] = results['Lon'].astype(float)
    results['Dep'] = results['Dep'].astype(float)

    return(results)    
    

In [5]:

def pl_cores(minn, maxx):

    names = ['Core', 'Location', 'Lat', 'Lon', 'Dep']

    # Read in the data
    indopac = pd.read_table("../Moving_water_mass/Data/Core_files/indopac_core_data_LS16.txt", delimiter = ',', names = names)
    atl = pd.read_table("../Moving_water_mass/Data/Core_files/atl_core_data_LS16.txt", delimiter = ',', names = names)
    add = pd.read_table("../Moving_water_mass/Data/Core_files/Additional_core_locations.txt", delimiter = ',', usecols = [0, 1, 2, 3, 4], names = names)

    # Join all into a single dataframe
    df = indopac.append(atl)
    df = df.append(add)
    df = df.reset_index(drop = True)

    df['d18O names'] = df['Core'] + '_ageLS16.txt'
    df['d13C names'] = df['Core'] + '_d13C.txt'

    # Loop over the dataset and interpolate each core
    i = 0
    results_dict = {}

    while i < df.count()[0]:

        try:
            df_d18O = pd.read_table('../Moving_water_mass/Data/Core_files/' + df.loc[i]['d18O names'], delim_whitespace = True, names = ['depth', 'age'], skip_blank_lines = True, na_values = 'NAN')
        except:
            i += 1
            continue
        try:
            df_d13C = pd.read_table('../Moving_water_mass/Data/Core_files/' + df.loc[i]['d13C names'], delim_whitespace = True, names = ['depth', 'd13C'], skip_blank_lines = True, na_values = 'NAN')
        except:
            i += 1
            continue

        df_d18O = df_d18O.dropna(subset = ['age']) 
        df_d13C = df_d13C.dropna(subset = ['d13C'])

        df_d18O = df_d18O.reset_index(drop = True)
        df_d13C = df_d13C.reset_index(drop = True)

        interp = scipy.interpolate.interp1d(df_d18O['depth'], df_d18O['age'], bounds_error = True)
        try:
            df_d13C['age'] = interp(df_d13C['depth'])
        except:
            try:
                interp2 = scipy.interpolate.interp1d(df_d18O['depth'], df_d18O['age'], bounds_error = False)
                df_d13C['age'] = interp2(df_d13C['depth'])
            except:
                i += 1
                continue

        df_d13C = df_d13C.dropna(subset = ['age'])
        df_d13C = df_d13C.reset_index(drop = True)

        df_d13C = df_d13C[df_d13C['age'] > minn]
        df_d13C = df_d13C[df_d13C['age'] < maxx]

        if len(df_d13C) > 0:
            df_results = df.drop(['d18O names', 'd13C names'], axis = 1)
            df_results = df_results.loc[df_results.index.repeat(len(df_d13C))].loc[[i]]

            df_d13C = df_d13C.drop(['depth'], axis = 1)

            df_results = df_results.reset_index(drop = True).join(df_d13C.reset_index(drop = True))
            results_dict.update({
                df_results.Core[0] : df_results.drop(['Core'], axis = 1)
            })

        i += 1
        
    df_results = pd.concat(results_dict).reset_index()
    df_results = df_results.rename(columns = {'level_0' : 'Core'})
    df_results = df_results.drop(['level_1'], axis = 1)

    return(df_results)
    

In [6]:
# Define variables

Hol_min = 2
Hol_max = 8


# Atlantic

##  Holocene Calculations

In [7]:
# ################## read in the oliver data using the predefined function

# Age range to look over

df_pl = pl_cores(Hol_min,Hol_max)

df_oliver = Oliver_cores(Hol_min,Hol_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the atlantic cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_atl = df[df['Location'] == 'Atlantic']
df_atl = df_atl.reset_index(drop = True)
df_atl['age'] = df_atl.age.astype(float)

# Slice the data
lower = np.arange(Hol_min, Hol_max, slice_width)
upper = np.arange(Hol_min+slice_width, Hol_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_atl[(df_atl['age'] >= low) & (df_atl['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})



In [8]:
# Save list of cores to latex table to include in paper
latex_table_Hol = proxy_compilation.drop(['d13C','count','lower'],axis=1)

latex_table_Hol = latex_table_Hol.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)


In [9]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]

    try:
        # interpolate the dataset
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
    except ValueError:
        interpolated_proxies.update({
            unique_lat : df_temp
        })
    
    single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})
    
    location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
    single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    
    
    interpolated_proxies.update({
        unique_lat : single_proxy_interpolated
    })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor

In [10]:

# Get volume weight average with time slices

interpolated_samples['weights'] = np.nan

# NEA
interpolated_samples.loc[((interpolated_samples['Lon'] < 20) | (interpolated_samples['Lon'] > (-33))) & (interpolated_samples['Lat'] > 0.1),
                        'weights'] = 4.3

# NWA
interpolated_samples.loc[(interpolated_samples['Lon'] < (-33)) & (interpolated_samples['Lon'] > (-180)) & (interpolated_samples['Lat'] > 0.1),
                         'weights'] = 4.9

# SEA
interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-14.6))) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                         'weights'] = 3.5

# SA
interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-22))) & (interpolated_samples['Lat'] < -40) & (interpolated_samples['Lat'] > -55),
                        'weights'] = 0.7

# SWA
interpolated_samples.loc[(interpolated_samples['Lon'] > (-60)) & (interpolated_samples['Lon'] < (-14.6)) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                        'weights'] = 5.0

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [11]:
# Copy the Holocene datasets to new variables
averaged_by_age_Holocene_Atlantic = averaged_by_age.copy()
interpolated_samples_Holocene_Atlantic = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Atlantic_Holocene_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Hol_Atlantic.csv",index=False)

# LIG Calculations

In [12]:
# ################## read in the oliver data using the predefined function

# Age range to look over
LIG_min = 118
LIG_max = 130

df_pl = pl_cores(LIG_min,LIG_max)

df_oliver = Oliver_cores(LIG_min,LIG_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the atlantic cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_atl = df[df['Location'] == 'Atlantic']
df_atl = df_atl.reset_index(drop = True)
df_atl['age'] = df_atl.age.astype(float)

# Slice the data
lower = np.arange(LIG_min, LIG_max, slice_width)
upper = np.arange(LIG_min+slice_width, LIG_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_atl[(df_atl['age'] >= low) & (df_atl['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})


In [13]:
# Save list of cores to latex table to include in paper
latex_table_LIG = proxy_compilation.drop(['d13C','count','lower'],axis=1)
latex_table_LIG = latex_table_LIG.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)

In [14]:
# Combine the two latex tables
latex_table = latex_table_Hol.merge(latex_table_LIG,on=['Core','Lat','Lon','Ocean_depth'],how='outer',indicator=True)

# Format the table
latex_table.replace(to_replace='both',value='Holocene & LIG',inplace=True)
latex_table.replace(to_replace='right_only',value='LIG',inplace=True)
latex_table.replace(to_replace='left_only',value='Holocene',inplace=True)
latex_table = latex_table.rename(columns={'Lat':'Latitude','Lon' : 'Longitude', 'Ocean_depth' : 'Depth (m)', '_merge':'Time Period'})
latex_table['Latitude'] = [str(round(x, 2)) for x in latex_table.Latitude]
latex_table['Longitude'] = [str(round(x, 2)) for x in latex_table.Longitude]
latex_table.sort_values(by='Core',inplace=True)

# Convert to string of latex markdown
latex_string = latex_table.to_latex(index=False,longtable=True)

# Reformat some parts of the latex table
latex_string = latex_string.replace('\\toprule','')
latex_string = latex_string.replace('\\midrule','')
latex_string = latex_string.replace('\\bottomrule','')

# Add caption to latex table
caption = [
    '\caption{Latitude, Longitude, and depth (m) coordinates for the Atlantic benthic foraminifera',
    '$\delta^{13}$C cores. \\textit{Time Period} refers to whether this core had',
    'Holocene, Last Interglacial, or Holocene and Last Interglacial data.}'
]

caption = ' '.join(caption)

latex_string = latex_string.replace('\\end{longtable}',caption+'\\end{longtable}')

# Write to a file
file1 = open("Figures/Cores_tables_Atlantic.tex","w") 
file1.write(latex_string) 
file1.close() #to change file access modes 
  

In [15]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]

    try:
        # interpolate the dataset
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
    except ValueError:
        interpolated_proxies.update({
            unique_lat : df_temp
        })
    
    single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})
    
    location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
    single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    
    
    interpolated_proxies.update({
        unique_lat : single_proxy_interpolated
    })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor


In [16]:

# Get volume weight average with time slices

interpolated_samples['weights'] = np.nan

# NEA
interpolated_samples.loc[((interpolated_samples['Lon'] < 20) | (interpolated_samples['Lon'] > (-33))) & (interpolated_samples['Lat'] > 0.1),
                        'weights'] = 4.3

# NWA
interpolated_samples.loc[(interpolated_samples['Lon'] < (-33)) & (interpolated_samples['Lon'] > (-180)) & (interpolated_samples['Lat'] > 0.1),
                         'weights'] = 4.9

# SEA
interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-14.6))) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                         'weights'] = 3.5

# SA
interpolated_samples.loc[((interpolated_samples['Lon'] < 30) | (interpolated_samples['Lon'] > (-22))) & (interpolated_samples['Lat'] < -40) & (interpolated_samples['Lat'] > -55),
                        'weights'] = 0.7

# SWA
interpolated_samples.loc[(interpolated_samples['Lon'] > (-60)) & (interpolated_samples['Lon'] < (-14.6)) & (interpolated_samples['Lat'] < 0) & (interpolated_samples['Lat'] > -55),
                        'weights'] = 5.0

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [17]:
# Copy the Holocene datasets to new variables
averaged_by_age_LIG_Atlantic = averaged_by_age.copy()
interpolated_samples_LIG_Atlantic = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Atlantic_LIG_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Atlantic.csv",index=False)


# Pacific

## Holocene

In [18]:
# ################## read in the oliver data using the predefined function

df_pl = pl_cores(Hol_min,Hol_max)

df_oliver = Oliver_cores(Hol_min,Hol_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the pacific cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_pac = df[df['Location'] == 'Pacific']
df_pac = df_pac.reset_index(drop = True)
df_pac['age'] = df_pac.age.astype(float)

# Slice the data
lower = np.arange(Hol_min, Hol_max, slice_width)
upper = np.arange(Hol_min+slice_width, Hol_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_pac[(df_pac['age'] >= low) & (df_pac['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})

In [19]:
# Save list of cores to latex table to include in paper
latex_table_Hol = proxy_compilation.drop(['d13C','count','lower'],axis=1)

latex_table_Hol = latex_table_Hol.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)


In [20]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]
    
#     print(df_temp)

    try:
        # interpolate the dataset
        
#         embed()
        
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
        single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})

        location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
        single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    

        interpolated_proxies.update({
            unique_lat : single_proxy_interpolated
        })
        
    except ValueError:

        interpolated_proxies.update({
            unique_lat : df_temp.drop('count',axis=1)
        })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




In [21]:

# Get volume weight average with time slices

interpolated_samples['weights'] = np.nan

# North
interpolated_samples.loc[(interpolated_samples['Lat'] > 0) ,'weights'] = 21.2

# South
interpolated_samples.loc[(interpolated_samples['Lat'] < 0) ,'weights'] = 23.9

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [22]:
# Copy the Holocene datasets to new variables
averaged_by_age_Holocene_Pacific = averaged_by_age.copy()
interpolated_samples_Holocene_Pacific = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Pacific_Holocene_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Hol_Pacific.csv",index=False)

## LIG

In [23]:
df_pl = pl_cores(LIG_min,LIG_max)

df_oliver = Oliver_cores(LIG_min,LIG_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the pacantic cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_pac = df[df['Location'] == 'Pacific']
df_pac = df_pac.reset_index(drop = True)
df_pac['age'] = df_pac.age.astype(float)

# Slice the data
lower = np.arange(LIG_min, LIG_max, slice_width)
upper = np.arange(LIG_min+slice_width, LIG_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_pac[(df_pac['age'] >= low) & (df_pac['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})


In [24]:
# Save list of cores to latex table to include in paper
latex_table_LIG = proxy_compilation.drop(['d13C','count','lower'],axis=1)
latex_table_LIG = latex_table_LIG.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)

In [25]:
# Combine the two latex tables
latex_table = latex_table_Hol.merge(latex_table_LIG,on=['Core','Lat','Lon','Ocean_depth'],how='outer',indicator=True)

# Format the table
latex_table.replace(to_replace='both',value='Holocene & LIG',inplace=True)
latex_table.replace(to_replace='right_only',value='LIG',inplace=True)
latex_table.replace(to_replace='left_only',value='Holocene',inplace=True)
latex_table = latex_table.rename(columns={'Lat':'Latitude','Lon' : 'Longitude', 'Ocean_depth' : 'Depth (m)', '_merge':'Time Period'})
latex_table['Latitude'] = [str(round(x, 2)) for x in latex_table.Latitude]
latex_table['Longitude'] = [str(round(x, 2)) for x in latex_table.Longitude]
latex_table.sort_values(by='Core',inplace=True)

# Convert to string of latex markdown
latex_string = latex_table.to_latex(index=False,longtable=True)

# Reformat some parts of the latex table
latex_string = latex_string.replace('\\toprule','')
latex_string = latex_string.replace('\\midrule','')
latex_string = latex_string.replace('\\bottomrule','')

# Add caption to latex table
caption = [
    '\caption{Latitude, Longitude, and depth (m) coordinates for the Pacific benthic foraminifera',
    '$\delta^{13}$C cores. \\textit{Time Period} refers to whether this core had',
    'Holocene, Last Interglacial, or Holocene and Last Interglacial data.}'
]

caption = ' '.join(caption)

latex_string = latex_string.replace('\\end{longtable}',caption+'\\end{longtable}')

# Write to a file
file1 = open("Figures/Cores_tables_Pacific.tex","w") 
file1.write(latex_string) 
file1.close() #to change file access modes 
  

In [26]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]

    try:
        # interpolate the dataset
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
    except ValueError:
        interpolated_proxies.update({
            unique_lat : df_temp
        })
    
    single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})
    
    location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
    single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    
    
    interpolated_proxies.update({
        unique_lat : single_proxy_interpolated
    })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor


In [27]:

# Get volume weight average with time slices

interpolated_samples['weights'] = np.nan

# North
interpolated_samples.loc[(interpolated_samples['Lat'] > 0) ,'weights'] = 21.2

# South
interpolated_samples.loc[(interpolated_samples['Lat'] < 0) ,'weights'] = 23.9

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [28]:
# Copy the Holocene datasets to new variables
averaged_by_age_LIG_Pacific = averaged_by_age.copy()
interpolated_samples_LIG_Pacific = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Pacific_LIG_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Pacific.csv",index=False)


# Indian

## Holocene

In [29]:
# ################## read in the oliver data using the predefined function

df_pl = pl_cores(Hol_min,Hol_max)

df_oliver = Oliver_cores(Hol_min,Hol_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the indian cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_ind = df[df['Location'] == 'Indian']
df_ind = df_ind.reset_index(drop = True)
df_ind['age'] = df_ind.age.astype(float)

# Slice the data
lower = np.arange(Hol_min, Hol_max, slice_width)
upper = np.arange(Hol_min+slice_width, Hol_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_ind[(df_ind['age'] >= low) & (df_ind['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})

In [30]:
# Save list of cores to latex table to include in paper
latex_table_Hol = proxy_compilation.drop(['d13C','count','lower'],axis=1)
latex_table_Hol = latex_table_Hol.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)

In [31]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]

    try:
        # interpolate the dataset
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
    except ValueError:
        interpolated_proxies.update({
            unique_lat : df_temp
        })
    
    single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})
    
    location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
    single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    
    
    interpolated_proxies.update({
        unique_lat : single_proxy_interpolated
    })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor


In [32]:

# Get volume weight average with time slices # Use a single region for the Indian Ocean

interpolated_samples['weights'] = 1

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [33]:
# Copy the Holocene datasets to new variables
averaged_by_age_Holocene_Indian = averaged_by_age.copy()
interpolated_samples_Holocene_Indian = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Indian_Holocene_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_Holocene_Indian.csv",index=False)



## LIG

In [34]:
df_pl = pl_cores(LIG_min,LIG_max)

df_oliver = Oliver_cores(LIG_min,LIG_max)

###################3 join the dataframes

df_results = df_pl.append(df_oliver)
  
#################### get only the Indian cores

df = df_results.reset_index(drop = True)
df['Dep'] = abs(df['Dep'])

df_ind = df[df['Location'] == 'Indian']
df_ind = df_ind.reset_index(drop = True)
df_ind['age'] = df_ind.age.astype(float)

# Slice the data
lower = np.arange(LIG_min, LIG_max, slice_width)
upper = np.arange(LIG_min+slice_width, LIG_max+slice_width, slice_width)

proxy_compilation = {}

for low, up in zip(lower, upper):
    df_slice = df_ind[(df_ind['age'] >= low) & (df_ind['age'] < up)]
    grouped_slice = df_slice.groupby(['Lat', 'Lon', 'Dep'])

    slice_averaged = {}

    for key, group in grouped_slice:
        group_averaged = group.mean()
        group_count = group.count()
        group_averaged['count'] = group_count['d13C']
        slice_averaged.update({
            group.Core.reset_index(drop=True)[0] : pd.DataFrame(group_averaged)
        })
    
    slice_averaged = pd.concat(slice_averaged, axis=1).T
    slice_averaged = slice_averaged.drop(['age'],axis=1)
    
    proxy_compilation.update({
        low : slice_averaged.T
    })

proxy_compilation = pd.concat(proxy_compilation,axis=1).T.reset_index(drop=False)
proxy_compilation = proxy_compilation.drop('level_2',axis=1)
proxy_compilation = proxy_compilation.rename(columns={'level_0' : 'lower', 'level_1' : 'Core', 'Dep' : 'Ocean_depth'})


In [35]:
# Save list of cores to latex table to include in paper
latex_table_LIG = proxy_compilation.drop(['d13C','count','lower'],axis=1)
latex_table_LIG = latex_table_LIG.drop_duplicates()

proxy_compilation.drop('Core',axis=1,inplace=True)

In [36]:
# Combine the two latex tables
latex_table = latex_table_Hol.merge(latex_table_LIG,on=['Core','Lat','Lon','Ocean_depth'],how='outer',indicator=True)

# Format the table
latex_table.replace(to_replace='both',value='Holocene & LIG',inplace=True)
latex_table.replace(to_replace='right_only',value='LIG',inplace=True)
latex_table.replace(to_replace='left_only',value='Holocene',inplace=True)
latex_table = latex_table.rename(columns={'Lat':'Latitude','Lon' : 'Longitude', 'Ocean_depth' : 'Depth (m)', '_merge':'Time Period'})
latex_table['Latitude'] = [str(round(x, 2)) for x in latex_table.Latitude]
latex_table['Longitude'] = [str(round(x, 2)) for x in latex_table.Longitude]
latex_table.sort_values(by='Core',inplace=True)

# Convert to string of latex markdown
latex_string = latex_table.to_latex(index=False,longtable=True)

# Reformat some parts of the latex table
latex_string = latex_string.replace('\\toprule','')
latex_string = latex_string.replace('\\midrule','')
latex_string = latex_string.replace('\\bottomrule','')

# Add caption to latex table
caption = [
    '\caption{Latitude, Longitude, and depth (m) coordinates for the Indian benthic foraminifera',
    '$\delta^{13}$C cores. \\textit{Time Period} refers to whether this core had',
    'Holocene, Last Interglacial, or Holocene and Last Interglacial data.}'
]

caption = ' '.join(caption)

latex_string = latex_string.replace('\\end{longtable}',caption+'\\end{longtable}')

# Write to a file
file1 = open("Figures/Cores_tables_Indian.tex","w") 
file1.write(latex_string) 
file1.close() #to change file access modes 
  

In [37]:
# interpolating across the entire dataset

# Add time bounds to the samples table
samples_with_time_period = proxy_compilation

unique_lats = np.unique(samples_with_time_period['Lat'])
years_list = np.unique(samples_with_time_period.lower)

samples_with_time_period = samples_with_time_period.reset_index(drop=True)

interpolated_proxies = {}

for unique_lat in unique_lats:
    
    # get a single proxy
    df_temp = samples_with_time_period[samples_with_time_period['Lat'] == unique_lat]

    try:
        # interpolate the dataset
        interp = scipy.interpolate.interp1d(df_temp['lower'],
                                        df_temp['d13C'],
                                        bounds_error = False)
    except ValueError:
        interpolated_proxies.update({
            unique_lat : df_temp
        })
    
    single_proxy_interpolated = pd.DataFrame({'lower' : years_list, 'd13C' : interp(years_list)})
    
    location_repeated = pd.concat([df_temp.reset_index(0).loc[0,['Lat', 'Lon', 'Ocean_depth']]] * len(single_proxy_interpolated), axis=1).T
    single_proxy_interpolated = pd.concat([location_repeated.reset_index(drop=True), single_proxy_interpolated.reset_index(drop=True)],axis=1)    
    
    interpolated_proxies.update({
        unique_lat : single_proxy_interpolated
    })
    
interpolated_samples = pd.concat(interpolated_proxies).reset_index(drop=True)
interpolated_samples_backup = interpolated_samples.copy()

# Combine the interpolated and not interpolated dataframes together so that

interpolated_samples = pd.merge(
    proxy_compilation, interpolated_samples, on=['Lat','Lon','Ocean_depth','d13C','lower'],
    indicator=True,how='right'
)

# Replace the keys 'both' or 'right_only' with colors, to set scatter plot fill
interpolated_samples.replace('right_only','none',inplace=True)

shallower_samples = interpolated_samples[interpolated_samples['Ocean_depth'] < dividing_depth].replace('both',str(point_color_shallow))
deeper_samples = interpolated_samples[interpolated_samples['Ocean_depth'] > dividing_depth].replace('both',str(point_color_deep))

interpolated_samples = shallower_samples.append(deeper_samples)
edgecolor = [point_color_deep if x >= dividing_depth else point_color_shallow for x in interpolated_samples.Ocean_depth]
interpolated_samples['edgecolor'] = edgecolor


In [38]:

# Get volume weight average with time slices

interpolated_samples['weights'] = 1

# group the cores based on the age and the region (indicated by weights)
grouped_by_age_region = interpolated_samples.groupby(['lower', 'weights'])

averaged_by_age_region = {}

# find group means
for key, group in grouped_by_age_region:
    averaged_by_age_region.update({
        key: np.mean(group)
    })
    
averaged_by_age_region = pd.concat(averaged_by_age_region,axis=1).T.reset_index(drop=True)

# Now there is one values for each region (weight) and each year combination
# Group by years and use weights to find the average d13C for that time period

grouped_by_age = averaged_by_age_region.groupby('lower')

averaged_by_age = {}
stdev_by_age = {}

# find group means
for key, group in grouped_by_age:
    
    # Find the normal average
    avg = np.mean(group.d13C)
    
    averaged_by_age.update({
        np.mean(group['lower']) : np.sum(group['d13C'] * group['weights'])/np.sum(group['weights'])
    })
    stdev_by_age.update({
        np.mean(group['lower']) : np.average((group.d13C-avg)**2,weights=group.weights)
    }) 
    
    
averaged_by_age = pd.DataFrame.from_dict(averaged_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'd13C'})
stdev_by_age = pd.DataFrame.from_dict(stdev_by_age,orient='index').reset_index(drop=False).rename(columns={'index' : 'lower', 0 : 'stdev'})

averaged_by_age = pd.merge(left=averaged_by_age,right=stdev_by_age)
averaged_by_age = averaged_by_age.sort_values('lower')


In [39]:
# Copy the Holocene datasets to new variables
averaged_by_age_LIG_Indian = averaged_by_age.copy()
interpolated_samples_LIG_Indian = interpolated_samples.copy()

interpolated_samples = interpolated_samples.drop('weights',axis=1)
# Save the datasets
averaged_by_age.to_csv("Data/Indian_LIG_profile_PL.csv", index=False)
interpolated_samples.to_csv("/srv/ccrc/data06/z5145948/Moving_water_mass/Data/interpolated_LIG_Indian.csv",index=False)


# Global

## holocene 

In [40]:
# reformat individual ocean dataframes so that they can be combined into a single dataframe
averaged_by_age_Holocene_Indian.rename(columns={'d13C':'d13C_ind','stdev':'stdev_ind'},inplace=True)
averaged_by_age_Holocene_Indian.set_index('lower',inplace=True)
averaged_by_age_Holocene_Pacific.rename(columns={'d13C':'d13C_pac','stdev':'stdev_pac'},inplace=True)
averaged_by_age_Holocene_Pacific.set_index('lower',inplace=True)
averaged_by_age_Holocene_Atlantic.rename(columns={'d13C':'d13C_atl','stdev':'stdev_atl'},inplace=True)
averaged_by_age_Holocene_Atlantic.set_index('lower',inplace=True)
averaged_by_age_LIG_Indian.rename(columns={'d13C':'d13C_ind','stdev':'stdev_ind'},inplace=True)
averaged_by_age_LIG_Indian.set_index('lower',inplace=True)
averaged_by_age_LIG_Pacific.rename(columns={'d13C':'d13C_pac','stdev':'stdev_pac'},inplace=True)
averaged_by_age_LIG_Pacific.set_index('lower',inplace=True)
averaged_by_age_LIG_Atlantic.rename(columns={'d13C':'d13C_atl','stdev':'stdev_atl'},inplace=True)
averaged_by_age_LIG_Atlantic.set_index('lower',inplace=True)

In [41]:
# Merge into a single data frame for each time period
global_ocean_slices_Holocene = pd.concat([averaged_by_age_Holocene_Indian,averaged_by_age_Holocene_Pacific,averaged_by_age_Holocene_Atlantic],axis=1)
global_ocean_slices_Holocene.reset_index(inplace=True)

global_ocean_slices_LIG = pd.concat([averaged_by_age_LIG_Indian,averaged_by_age_LIG_Pacific,averaged_by_age_LIG_Atlantic],axis=1)
global_ocean_slices_LIG.reset_index(inplace=True)

In [42]:
# Calculate volume weighted standard deviations and average d13C
global_ocean_slices_Holocene['d13C'] = (global_ocean_slices_Holocene['d13C_atl']*18.4 +
                                   global_ocean_slices_Holocene['d13C_pac']*45.1 +
                                   global_ocean_slices_Holocene['d13C_ind']*14.8

)/(18.4+45.1+14.8)

global_ocean_slices_Holocene['stdev'] = (global_ocean_slices_Holocene['stdev_atl']*18.4 +
                                   global_ocean_slices_Holocene['stdev_pac']*45.1 +
                                   global_ocean_slices_Holocene['stdev_ind']*14.8

)/(18.4+45.1+14.8)

global_ocean_slices_LIG['d13C'] = (global_ocean_slices_LIG['d13C_atl']*18.4 +
                                   global_ocean_slices_LIG['d13C_pac']*45.1 +
                                   global_ocean_slices_LIG['d13C_ind']*14.8

)/(18.4+45.1+14.8)

global_ocean_slices_LIG['stdev'] = (global_ocean_slices_LIG['stdev_atl']*18.4 +
                                   global_ocean_slices_LIG['stdev_pac']*45.1 +
                                   global_ocean_slices_LIG['stdev_ind']*14.8

)/(18.4+45.1+14.8)

In [44]:
# Save the datasets
global_ocean_slices_Holocene.to_csv("Data/Global_Holocene_profile_PL.csv", index=False)
global_ocean_slices_LIG.to_csv("Data/Global_LIG_profile_PL.csv", index=False)
