In [None]:
""" Import libraries """

import pandas as pd
import tkinter as tk
from tkinter import ttk
import matplotlib.pyplot as plt
from spreadsheet_loader import load_spreadsheets_from_folder

In [None]:
""" Load spreadsheets and create Pandas dataframe """

df = load_spreadsheets_from_folder()
print(df.head())
print(df.shape)

In [None]:
""" Select only relevant columns """

def select_columns_gui(df, max_button_width=15, max_window_height=500, min_button_padding=20):
    selected_cols = []

    def toggle_column(col_name):
        if col_name in selected_cols:
            selected_cols.remove(col_name)
            buttons[col_name].config(relief="raised", bg="SystemButtonFace")
        else:
            selected_cols.append(col_name)
            buttons[col_name].config(relief="sunken", bg="lightblue")

    def submit():
        root.quit()

    # --- Layout sizing logic ---
    column_names = list(df.columns)
    max_col_len = max(len(str(col)) for col in column_names)
    button_pixel_width = max_col_len * 7 + min_button_padding
    buttons_per_row = max(1, min(6, 1000 // button_pixel_width))  # max ~6 per row
    total_width = min(1000, buttons_per_row * button_pixel_width + 50)

    # --- Main window ---
    root = tk.Tk()
    root.title("Select Columns")
    root.geometry(f"{total_width}x{max_window_height}")

    # --- Outer layout: top (scrollable) + bottom (submit) ---
    outer_frame = tk.Frame(root)
    outer_frame.pack(fill="both", expand=True)

    # --- Canvas + Scrollbar setup ---
    canvas = tk.Canvas(outer_frame, width=total_width)
    scrollbar = ttk.Scrollbar(outer_frame, orient="vertical", command=canvas.yview)
    canvas.configure(yscrollcommand=scrollbar.set)

    scrollbar.pack(side="right", fill="y")
    canvas.pack(side="left", fill="both", expand=True)

    # --- Create scrollable frame ---
    scrollable_frame = tk.Frame(canvas)
    canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")

    # --- Ensure canvas resizes properly ---
    def update_scrollregion(event):
        canvas.configure(scrollregion=canvas.bbox("all"))
    scrollable_frame.bind("<Configure>", update_scrollregion)

    # --- Fill with buttons ---
    buttons = {}
    for i, col in enumerate(column_names):
        row = i // buttons_per_row
        col_pos = i % buttons_per_row
        btn = tk.Button(scrollable_frame, text=col, width=max_button_width, command=lambda c=col: toggle_column(c))
        btn.grid(row=row, column=col_pos, padx=5, pady=5)
        buttons[col] = btn

    # --- Submit button, fixed at bottom ---
    submit_frame = tk.Frame(root)
    submit_frame.pack(fill="x")
    submit_btn = tk.Button(submit_frame, text="Submit", command=submit)
    submit_btn.pack(pady=10)

    # --- Final setup ---
    root.update_idletasks()
    canvas.configure(scrollregion=canvas.bbox("all"))

    root.mainloop()
    root.destroy()

    return df[selected_cols]



df_select = select_columns_gui(df)
print(df_select)


In [None]:
""" Dataset cleaning """

# Select only extant animals

df_extant = df_select[df_select['Age'] == 'present-day']

# Remove entries with missing values

df_OCisotopes = df_extant.dropna(axis=0, subset=['d18Oc', 'd18Owmoy', 'Animal'])

print(df_OCisotopes)

# Convert V-PDB d18O values to V-SMOW scale

df_OCisotopes.loc[df_OCisotopes['d18OcStd'] == 'V-PDB', 'd18Oc'] = (1.03092 * df_OCisotopes.loc[df_OCisotopes['d18OcStd'] == 'V-PDB', 'd18Oc'] + 30.92)


# Scatter plot of two variables

x = 'd18Oc'
Y = 'd18Owmoy'

# df_OCisotopes.plot(kind='scatter', x=x, y=Y, c=None)

# Define a list of markers to cycle through
markers = ['o', 's', 'D', '^', '*', 'x', 'P', 'H']
group_values = df_OCisotopes['Animal'].unique()

fig, ax = plt.subplots()

for i, Animal in enumerate(group_values):
    marker = markers[i % len(markers)]
    subset = df_OCisotopes[df_OCisotopes['Animal'] == Animal]
    ax.scatter(subset[x], subset[Y], marker=marker, label=f"{Animal}")

ax.legend()
plt.show()