In [1]:
""" Import libraries """

import pandas as pd
import matplotlib.pyplot as plt
from gui_functions import load_spreadsheets_from_folder, select_columns_gui, filter_dataframe_by_checkboxes

In [2]:
""" Load spreadsheets and create Pandas dataframe """

df = load_spreadsheets_from_folder()
print(df.columns.tolist())
print(f'the dataframe contains {df.shape[0]} lines and {df.shape[1]} columns' )

['Taxon', 'Family', 'Animal', 'Material', 'Geographic_location', 'Country', 'Age', 'Formation', 'Environment', 'Captive_wild', 'd18Oc', 'd18OcSD', 'd18OcStd', 'd13Cc', 'd13CcSD', 'd18Owmoy', 'Water_source', 'Min_LAT', 'Max_LAT', 'Min_LONG', 'Max_LONG', 'Notes', 'DOI', 'RefShort', 'source_file', 'Collection_number', 'd13Corg', 'd15Norg', 'd18OwSD', 'd13Cfood', 'd15Nfood', 'Laying_year', 'Laying_month', 'Ootaxon', 'D47c', 'D47cSD', 'Fe/Ca', 'Li/Ca', 'Mg/Ca', 'Mn/Ca', 'SR/Ca', 'd13CfoodSD', 'd18Oalb', 'd18OalbSD', 'Mn(ppm)', 'Fe(ppm)', 'D48c', 'D48cSD', 'Min_Lat', 'Max_Lat', 'Min_Long', 'Max_Long', 'WebLink', 'Sr(ppm)', 'MgO(%)', 'd13Calb', 'd13CalbSD', 'd15Nalb', 'd15NalbSD', 'd34Salb', 'd34SalbSD']
the dataframe contains 1218 lines and 61 columns


In [3]:
""" Select only relevant columns """

df_select = select_columns_gui(df)

print(df_select.columns.tolist())
print(f'the dataframe contains {df_select.shape[0]} lines and {df_select.shape[1]} columns' )
df_select.dtypes

['Animal', 'Age', 'Environment', 'd18Oc', 'd18OcStd', 'd13Cc']
the dataframe contains 1218 lines and 6 columns


Animal          object
Age             object
Environment     object
d18Oc          float64
d18OcStd        object
d13Cc          float64
dtype: object

In [None]:
""" Filter dataframe by various checkboxes """

filtered_df = filter_dataframe_by_checkboxes(df_select)

In [None]:
print(filtered_df)

In [None]:
""" Dataset cleaning """

# Select only extant animals

df_extant = df_select[df_select['Age'] == 'present-day']

# Remove entries with missing values

df_OCisotopes = df_extant.dropna(axis=0, subset=['d18Oc', 'd18Owmoy', 'Animal'])

# Convert V-PDB d18O values to V-SMOW scale

df_OCisotopes.loc[df_OCisotopes['d18OcStd'] == 'V-PDB', 'd18Oc'] = (1.03092 * df_OCisotopes.loc[df_OCisotopes['d18OcStd'] == 'V-PDB', 'd18Oc'] + 30.92)
df_OCisotopes.dtypes

In [None]:
""" Showing dataset as plots """

# Scatter plot of two variables (x, y) and category separator (marker style)

x_axis = 'd18Oc'
y_axis = 'd18Owmoy'
marker_separator = 'Animal'

# df_OCisotopes.plot(kind='scatter', x=x, y=Y, c=None)

# Define a list of markers to cycle through
markers = ['o', 's', 'D', '^', '*', 'x', 'P', 'H']
group_values = df_OCisotopes[marker_separator].unique()

fig, ax = plt.subplots()

for i, Animal in enumerate(group_values):
    marker = markers[i % len(markers)]
    subset = df_OCisotopes[df_OCisotopes[marker_separator] == Animal]
    ax.scatter(subset[x_axis], subset[y_axis], marker=marker, label=f"{Animal}")

ax.legend()
plt.show()