In [None]:
# histogram/dotplot (Credit to Microsoft Copilot)
# Note to self: figure out how the heck this works)
# To do: something ratios
import dash
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, State, ctx, ALL
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os
import signal

%run -i assets/lists.ipynb

# imports Planetary systems csv
pl_s = pd.read_csv('assets/Planetary_Systems.csv', comment="#", low_memory=False)
pl_es = pd.read_csv('assets/Planetary_Systems_Estimated.csv', comment="#", low_memory=False)

# imports atmospheric list for one filter.
at_s = pd.read_csv('assets/Atmospheric_Spectroscopy.csv', comment="#", low_memory=False)

solar_planets = pd.read_csv('assets/Solar_Values.csv', comment="#", low_memory=False)

# Dash app
app = Dash(__name__, external_stylesheets=['assets/style.css'])

# List of database columns to take into account for graphs
planet_features = {
    'Planet (Compare Planets only)': [
        {'label': "Planet Orbital Period (days, pl_orbper, Recommended)", 'value': "pl_orbper"},
        {'label': "Planet Orbit Semi-Major Axis (au, pl_orbsmax)", 'value': "pl_orbsmax"},
        {'label': "Planet Epoch of Periastron (deg, pl_orbtper)", 'value': "pl_orbtper"},
        {'label': "Planet Argument of Periastron (deg, pl_orblper)", 'value': "pl_orblper"},
        {'label': "Planet Projected Obliquity (deg, pl_projobliq)", 'value': "pl_projobliq"},
        {'label': "Planet True Obliquity (deg, pl_trueobliq)", 'value': "pl_trueobliq"},
        {'label': "Planet Radius (Earth radius, pl_rade, Recommended)", 'value': "pl_rade"},
        {'label': "Planet Mass (Earth mass, pl_bmasse, Recommended)", 'value': "pl_bmasse"},
        {'label': "Planet Density (g/cm^3, pl_dens, Recommended)", 'value': "pl_dens"},
        {'label': "Planet Orbital Eccentricity (pl_orbeccen)", 'value': "pl_orbeccen"},
        {'label': "Planet Insolation Flux (Earth flux, pl_insol)", 'value': "pl_insol"},
        {'label': "Planet Equilibrium Temperature (K, pl_eqt, Recommended)", 'value': "pl_eqt"},
        {'label': "Planet Transit Duration (hrs, pl_trandur)", 'value': "pl_trandur"},
        {'label': "Planet Transit Midpoint (days, pl_tranmid)", 'value': "pl_tranmid"},
        {'label': "Planet Transit Depth (%, pl_trandep)", 'value': "pl_trandep"},
        {'label': "Planet Impact Parameter (pl_imppar)", 'value': "pl_imppar"},
        {'label': "Planet Occulation Depth (%, pl_occdep)", 'value': "pl_occdep"},
        {'label': "Planet Radial Velocity Amplitude (m/s, pl_rvamp)", 'value': "pl_rvamp"},
        {'label': "Discovery Year (disc_year)", 'value': "disc_year"},
        {'label': "Last Update (rowupdate)", 'value': "rowupdate"},
    ],
    'Stellar': [
        {'label': "Stellar Effective Temperature (K, st_teff, Recommended)", 'value': "st_teff"},
        {'label': "Stellar Radius (Solar radius, st_rad, Recommended)", 'value': "st_rad"},
        {'label': "Stellar Mass (Solar mass, st_mass, Recommended)", 'value': "st_mass"},
        {'label': "Stellar Density (g/cm^3, st_dens)", 'value': "st_dens"},
        {'label': "Stellar Surface Gravity (log10(cm/s^2), st_logg)", 'value': "st_logg"},
        {'label': "Stellar Age (Gyr, st_age)", 'value': "st_age"},
        {'label': "Stellar Rotational Period (days, st_rotp)", 'value': "st_rotp"},
        {'label': "Stellar Rotational Velocity (km/s, st_vsin)", 'value': "st_vsin"},
        {'label': "Stellar Radial Velocity (km/s, st_radv)", 'value': "st_radv"},
        {'label': "Stellar Metallicity (dex, st_met, Recommended)", 'value': "st_met"},
        {'label': "Stellar Luminosity (log10(Solar), st_lum)", 'value': "st_lum"},
    ],
    'System': [
        {'label': "System Parallax (mas, sy_plx)", 'value': "sy_plx"},
        {'label': "System Distance from Earth (pc, sy_dist)", 'value': "sy_dist"},
        {'label': "System # Stars (sy_snum)", 'value': "sy_snum"},
        {'label': "System # Planets (sy_snum)", 'value': "sy_pnum"},
        {'label': "System #  Moons (sy_snum)", 'value': "sy_mnum"},
        {'label': "System u (Sloan) Magnitude (sy_umag, ~354 nm)", 'value': "sy_umag"},
        {'label': "System B (Johnson) Magnitude (sy_bmag, ~442 nm)", 'value': "sy_bmag"},
        {'label': "System g (Sloan) Magnitude (sy_gmag, ~475 nm)", 'value': "sy_gmag"},
        {'label': "System V (Johnson) Magnitude (sy_vmag, ~540 nm)", 'value': "sy_vmag"},
        {'label': "System Kepler Magnitude (sy_kepmag, ~600 nm)", 'value': "sy_kepmag"},
        {'label': "System r (Sloan) Magnitude (sy_rmag, ~622 nm)", 'value': "sy_rmag"},
        {'label': "System Gaia Magnitude (sy_gaiamag, ~673 nm)", 'value': "sy_gaiamag"},
        {'label': "System i (Sloan) Magnitude (sy_imag, ~763 nm)", 'value': "sy_imag"},
        {'label': "System I (Cousins) Magnitude (sy_icmag, ~786.5 nm)", 'value': "sy_icmag"},
        {'label': "System TESS Magnitude (sy_tmag, ~800 nm)", 'value': "sy_tmag"},
        {'label': "System z (Sloan) Magnitude (sy_zmag, ~905 nm)", 'value': "sy_zmag"},
        {'label': "System J (2MASS) Magnitude (sy_jmag, ~1.25 μm)", 'value': "sy_jmag"},
        {'label': "System H (2MASS) Magnitude (sy_hmag, ~1.65 μm)", 'value': "sy_hmag"},
        {'label': "System Ks (2MASS) Magnitude (sy_kmag, ~2,15 μm)", 'value': "sy_kmag"},
        {'label': "System W1 (WISE) Magnitude (sy_w1mag, ~3.4 μm)", 'value': "sy_w1mag"},
        {'label': "System W2 (WISE) Magnitude (sy_w2mag, ~4.6 μm)", 'value': "sy_w2mag"},
        {'label': "System W3 (WISE) Magnitude (sy_w3mag, ~12 μm)", 'value': "sy_w3mag"},
        {'label': "System W4 (WISE) Magnitude (sy_w4mag, ~22 μm)", 'value': "sy_w4mag"},
    ]
}
#creates dropdown options from the planet_features dictionary
dropdown_options = []
for category, options in planet_features.items():
    dropdown_options.append({
        'label': f'--- {category} ---',
        'value': f'header-{category}',
        'disabled': True
    })
    dropdown_options.extend(options)

app.layout = html.Div([
    html.H1("Histogram/\"Dotplot\""),

    html.H2("Data filters:"),
    dcc.Checklist(
        id='filter-checklist',
        options=[
            {'label': 'Has atmospheric data (Compare Planets only)', 'value': 'atmoData'},
            {'label': 'Default parameter set', 'value': 'default'},
            {'label': 'No controversial flag', 'value': 'noControv'},
            {'label': 'Water to Metal Density (Compare Planets only)', 'value': 'densRange'},
            {'label': 'In Target Star Catalog', 'value': 'target'},
            {'label': 'Include Solar System', 'value': 'solar'},
        ],
        value=['default', 'noControv'],  # Default values to filter by
        className="checkbox-container"
    ),
    html.Div([
        dcc.Checklist(
            id='host-star-toggle',
            options=[{'label': 'Filter by host star: ', 'value': 'enable'}],
            value=[],  # Start unchecked
            className="checkbox-container"
        ),
        dcc.Input(
            id='host-star-input',
            type='text',
            placeholder='Enter host star name',
            debounce=True,
            className="textbox-style"
        ),
    ], className="checkbox-container", style={'display': 'flex', 'alignItems': 'center', 'justifyContent': 'center', 'gap': '10px'}),

    html.Details([
        html.Summary("Planet type (Compare Planets only)"),
        dcc.Checklist(
            id='pltype-checklist',
            options=[
                {'label': 'Terrestrial', 'value': 'terrestrial'},
                {'label': 'Super-Earth', 'value': 'super_earth'},
                {'label': 'Neptune-like', 'value': 'neptune_like'},
                {'label': 'Gas Giant', 'value': 'gas_giant'},
                {'label': 'Unknown', 'value': 'unknown'},
                {'label': 'TBA', 'value': 'tba'},
            ],
            value=['terrestrial', 'super_earth', 'neptune_like', 'gas_giant', 'unknown', 'tba'],  # Default values to filter by
            className="checkbox-container"
        ),
    ]),

    html.Details([
        html.Summary("Discovery method (Compare Planets only)"),
        dcc.Checklist(
            id='discmethod-checklist',
            options=[
                {'label': 'Transit', 'value': 'Transit'},
                {'label': 'Transit Timing Variations', 'value': 'Transit Timing Variations'},
                {'label': 'Eclipse Timing Variations', 'value': 'Eclipse Timing Variations'},
                {'label': 'Orbital Brightness Modulation', 'value': 'Orbital Brightness Modulation'},
                {'label': 'Radial Velocity', 'value': 'Radial Velocity'},
                {'label': 'Astrometry', 'value': 'Astrometry'},
                {'label': 'Imaging', 'value': 'Imaging'},
                {'label': 'Disc Kinematics', 'value': 'Disc Kinematics'},
                {'label': 'Microlensing', 'value': 'Microlensing'},
                {'label': 'Pulsar Timing', 'value': 'Pulsar Timing'},
                {'label': 'Pulsation Timing Variations', 'value': 'Pulsation Timing Variations'},
                {'label': 'Known Since Antiquity', 'value': 'Known Since Antiquity'},
                {'label': 'Unknown', 'value': 'null'},
            ],
            value=['Transit', 'Radial Velocity', 'Imaging', 'Eclipse Timing Variations', 'Microlensing', 'Pulsar Timing', 'Pulsation Timing Variations', 'Orbital Brightness Modulation', 'Transit Timing Variations', 'Astrometry', 'Disc Kinematics', 'Known Since Antiquity', 'null'],  # automatically selects all
            className="checkbox-container"
        ),
    ]),  

    html.Details([
        html.Summary("Discovery locale (Compare Planets only)"),
        dcc.Checklist(
            id='disclocale-checklist',
            options=[
                {'label': 'Space', 'value': 'Space'},
                {'label': 'Ground', 'value': 'Ground'},
                {'label': 'Multiple Locales', 'value': 'Multiple Locales'},
                {'label': 'Unknown', 'value': 'null'},
            ],
            value=['Space', 'Ground', 'Multiple Locales', 'null'],  # Default values to filter by
            className="checkbox-container"
        ),
    ]), 
    
    html.Details([
        html.Summary("Harvard spectral classes"),
        dcc.Checklist(
            id='teff-checklist',
            options=[
                {'label': 'O type star (>33000 K)', 'value': 'O'},
                {'label': 'B type star (10000-33000 K)', 'value': 'B'},
                {'label': 'A type star (7300-10000 K)', 'value': 'A'},
                {'label': 'F type star (6000-7300 K)', 'value': 'F'},
                {'label': 'G type star (5300-6000 K)', 'value': 'G'},
                {'label': 'K type star (3900-5300 K, recommended)', 'value': 'K'},
                {'label': 'M type star (2300-3900 K)', 'value': 'M'},
                {'label': 'L type star (1300-2500 K)', 'value': 'L'},
                {'label': 'T type star (700-1300 K)', 'value': 'T'},
                {'label': 'Y type star (<700 K)', 'value': 'Y'},
                {'label': 'Wolf-Rayet star (>30000 K)', 'value': 'W'},
                {'label': 'White Dwarf (~5000-100000+ K)', 'value': 'D'},
                {'label': 'Unknown', 'value': 'null'},
            ],
            value=['O', 'B', 'A', 'F', 'G', 'K', 'M', 'null'],  # Default values to filter by
            className="checkbox-container"
        ),
    ]),

    html.Details([
        html.Summary("Yerkes spectral classes"),
        dcc.Checklist(
            id='lum-checklist',
            options=[
                # {'label': 'Hypergiant (0)', 'value': '0'},
                # {'label': 'Supergiant (I)', 'value': 'I'},
                {'label': 'Bright Giant (II)', 'value': 'II'},
                {'label': 'Giant (III)', 'value': 'III'},
                {'label': 'Subgiant (IV)', 'value': 'IV'},
                {'label': 'Main-sequence/Dwarf (V, recommended)', 'value': 'V'},
                {'label': 'Subdwarf (VI)', 'value': 'VI'},
                # {'label': 'White Dwarf (VII)', 'value': 'VII'},
                {'label': 'Unknown', 'value': 'null'},
            ],
            value=['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'null'],  # Default values to filter by
            className="checkbox-container"
        ),
    ]),
    
    html.Details([
        html.Summary("Stellar metallicity ratio"),
        dcc.Checklist(
            id='met-checklist',
            options=[
                {'label': 'Iron abundance (recommended)', 'value': '[Fe/H]'},
                {'label': 'General metal content', 'value': '[M/H]'},
                {'label': 'Unknown', 'value': 'null'},
            ],
            value=['[Fe/H]', '[M/H]', 'null'],  # Default values to filter by
            className="checkbox-container"
        ),
    ]), 

    html.H2("Select a feature:"),
    dcc.Dropdown(
        id='feature-dropdown',
        options=dropdown_options,
        value="pl_dens",  # Default value
        className="dropdown-container",
        placeholder="Choose a planetary feature"
    ),
    
    html.H2("Group by:"),
    dcc.RadioItems(
        id="colorcode-radioitems",
        options=[
            {'label': 'Planet Type (Compare Planets only)', 'value': 'pl_type'},
            {'label': 'Harvard Spec. Class', 'value': 'st_teffclass'},
            {'label': 'Yerkes Spec. Class', 'value': 'st_lumclass'},
            {'label': 'Stellar Metallicity', 'value': 'st_metratio'},
            {'label': 'Discovery Method (Compare Planets only)', 'value': 'discoverymethod'},
            {'label': 'Discovery Locale (Compare Planets only)', 'value': 'disc_locale'},
            # {'label': 'Host Star', 'value': 'hostname'}, # too much computational power.
        ],  
        value="pl_type",
        className="checkbox-container"
    ),

    html.H2("Extra options:"),
    dcc.Checklist(
        id='extra-checklist',
        options=[
            {'label': html.Span(['Simplify planet types and discovery methods (Compare Planets only)'], style={'fontWeight': 'bold'}), 'value': 'simple'},
            {'label': html.Span(['Estimate missing values'], style={'fontWeight': 'bold'}), 'value': 'estimate'},
            {'label': html.Span(['Compare stars'], style={'fontWeight': 'bold'}), 'value': 'suns'},
            {'label': html.Span(['Compare histogram bar proportions?'], style={'fontWeight': 'bold'}), 'value': 'segment'},
        ],
        value=[],  # Start unchecked
        className="checkbox-container"
    ),
    
    html.H2("Scale Types:"),
    html.H3("Feature (dotplot)"),
    dcc.RadioItems(
        id="scalex-radioitems",
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Log', 'value': 'log'}
        ],
        value='linear',
        className="checkbox-container"
    ),
    html.H3("Count (histogram)"),
    dcc.RadioItems(
        id="scaley-radioitems",
        options=[
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Log', 'value': 'log'}
        ],
        value='linear',
        className="checkbox-container"
    ),

    html.H2("Min Value:"),
    dcc.Input(
        id="min-value-input",
        type="number",
        value=0,
        debounce=True,  # Optional: only trigger callback on blur/enter
        style={"width": "100px"}
    ),

    html.H2("Max Value:"),
    dcc.Input(
        id="max-value-input",
        type="number",
        value=100,
        debounce=True,
        style={"width": "100px"}
    ),

    html.H2("Number of Histogram Bins:"),
    dcc.Input(
        id="hist-bin-input",
        type="number",
        value=100,
        step=1,
        debounce=True,
        style={"width": "100px"}
    ),
    html.Div([
        html.Button('Update Graph', id='update-button', n_clicks=1),
    ], style={"marginTop": "12px"}),

    dcc.Tabs(id="tabs", value="tab-1", children=[
        dcc.Tab(label="Histogram", value="tab-1", className="tab"),
        dcc.Tab(label="Dotplot", value="tab-2", className="tab"),
    ]),
    html.Div(id="tabs-content", children=[
        html.Div(id="histograms", style={"display": "block"}, children=[
            dcc.Graph(id="big_hist", figure={}),
            dcc.Graph(id="mini_hist", figure={})
        ]),
        dcc.Graph(id="dotplot", figure={}, style={"display": "none"}),
    ]),  # This will hold the selected graph

    html.P('NaN info:', id='naninfo-box'),

    html.P("Data sourced from:"),
    html.Div([
        html.A("https://www.doi.org/10.26133/NEA12", href="https://www.doi.org/10.26133/NEA12", target="_blank"),
        html.Br(),
        html.A("https://www.doi.org/10.26133/NEA36", href="https://www.doi.org/10.26133/NEA36", target="_blank"),
        html.Br(),
        html.A("https://science.nasa.gov/exoplanets/exoplanet-catalog/", href="https://science.nasa.gov/exoplanets/exoplanet-catalog/", target="_blank"),
        html.Br(),
        html.A("https://science.nasa.gov/exoplanets/target-star-catalog/", href="https://science.nasa.gov/exoplanets/target-star-catalog/", target="_blank"),
    ]),
    
    html.Button("Reset Settings", id="reset-button", n_clicks=0),

    dcc.Store(id='settings-store', storage_type='local'),
    dcc.Store(id='rehydration-complete', data=False),
    dcc.Store(id='init-flag', data=False)

], style={'color': 'black', 'font-family': 'Arial', 'backgroundColor': 'white', 'padding': '20px', 'text-align': 'center'})

def update_graph(update, filters, extra, scalex, scaley, feature, no_bins, startoggle, starinput, colorcode, pltype, discmethod, disclocale, teff, lum, met, min_value, max_value):
    # error handling
    if feature is None:
        raise dash.exceptions.PreventUpdate
    planet_feature_values = {option['value'] for option in planet_features['Planet (Compare Planets only)']}
    if 'suns' in extra and feature in planet_feature_values:
        return go.Figure(), go.Figure(), go.Figure(), "ERROR: Cannot use planet features when using star data."
    if max_value <= min_value:
        return go.Figure(), go.Figure(), go.Figure(), "ERROR: maximum is greater than or equal to minimum"
    if 'suns' in extra and (colorcode == "pl_type" or colorcode == "discoverymethod" or colorcode == "disc_locale"):
        return go.Figure(), go.Figure(), go.Figure(), f"ERROR: Cannot group by {colorcode} when using star data."
    
    # Load dataset
    if 'estimate' in extra:
        df = pl_es.copy()
    else:
        df = pl_s.copy()
    
    # Filter dataset based on user input
    if 'solar' in filters:
        if 'suns' in extra:
            first_solar_entry = solar_planets.iloc[[0]].copy()  # Get just the first row as a DataFrame
            df = pd.concat([df.copy(), first_solar_entry], ignore_index=True)
        else:
            df = pd.concat([df.copy(), solar_planets.copy()], ignore_index=True) # type: ignore
    if feature == "rowupdate" or feature == "releasedate":
        epoch = pd.Timestamp("1970-01-01")
        if feature in df.columns:
            df.loc[:, feature] = (
                pd.to_datetime(df[feature], errors="coerce")
                .subtract(epoch)
                .dt.total_seconds()
                .astype('Int64')  # Pandas nullable integer dtype
        )
    if not 'suns' in extra:
        if 'atmoData' in filters:
            df = df[df['pl_name'].isin(at_s['pl_name']) | df["is_solar"] == True]
        if 'densRange' in filters:
            df=df[(df['pl_dens'] > 1) & (df['pl_dens'] < 5.6)] # Only gets planets with a density between water and metallic iron
        df = df[df['pl_type'].isin(pltype)]  
        if 'simple' in extra:
            df["pl_type"] = df["pl_type"].replace({"super_earth": "terrestrial", "neptune_like": "gas_giant"})
            df["discoverymethod"] = df["discoverymethod"].replace({"Transit Timing Variations": "Transit", "Eclipse Timing Variations": "Transit", "Orbital Brightness Modulation": "Transit", "Astrometry": "Radial Velocity", "Pulsation Timing Variations": "Pulsar Timing", "Disc Kinematics": "Imaging"})  
        if 'null' in discmethod:
            df = df[df['discoverymethod'].isin(discmethod) | df['discoverymethod'].isna()]
        else:
            df = df[df['discoverymethod'].isin(discmethod) | df["is_solar"] == True]
        if 'null' in disclocale:
            df = df[df['disc_locale'].isin(disclocale) | df['disc_locale'].isna()]
        else:
            df = df[df['disc_locale'].isin(disclocale)]
    
    if 'null' in teff:
        df = df[df['st_teffclass'].isin(teff) | df['st_teffclass'].isna()] 
    else:
        df = df[df['st_teffclass'].isin(teff)]   
    if 'null' in lum:
        df = df[df['st_lumclass'].isin(lum) | df['st_lumclass'].isna()] 
    else:
        df = df[df['st_lumclass'].isin(lum)]
    if 'default' in filters:
        df = df[df['default_flag'] == True]
    if 'noControv' in filters:
        df = df[df['pl_controv_flag'] == False]
    if 'target' in filters:
        df = df[df['hostname'].isin(target_stars) | df["is_solar"] == True] # type: ignore
    if 'null' in met:
        df = df[df['st_metratio'].isin(met) | df['st_metratio'].isna()]
    else:
        df = df[df['st_metratio'].isin(met)]
    if startoggle and starinput.strip():
        # Normalize both sides for safe matching
        df = df[df['hostname'].str.lower() == starinput.strip().lower()]

    naninfo = f"{df[feature].isna().sum()} NaNs" # gets the number of NaNs in the feature column
    df = df[(df[feature] >= min_value) & (df[feature] <= max_value)] # filters the dataset by the min and max values

    # Select features to be evaluated (keeps certain others for hover info)
    if 'suns' in extra:
        df = df[[feature] +
            ["hostname"] + ["is_solar"] + ["st_spectype"] + ["st_teffclass"] + ["st_lumclass"] + ["st_metratio"]
        ]
        def get_mode(series):
            mode_vals = series.mode()
            if not mode_vals.empty:
                return mode_vals.iloc[0]  # Return the first mode (if multiple)
            return np.nan  # Or return None, depending on preference
        df = df.groupby("hostname", as_index=False).agg(get_mode)
    else:
        df = df[["pl_name"] + ["pl_type"] +
            ["discoverymethod"] + ["disc_refname"] + ["disc_locale"] + ["disc_facility"] + ["disc_telescope"] + ["disc_instrument"] +
            ["hostname"] + ["is_solar"] + ["st_spectype"] + ["st_teffclass"] + ["st_lumclass"] + ["st_metratio"] +
            [feature]
        ]

    # Returns empty graphs if filtered dataset is empty
    if df.empty:
        return go.Figure(), go.Figure(), go.Figure(), "ERROR: filtered dataset is empty"

    # Handle NaNs in colorcode by assigning 'unknown'
    df[colorcode] = df[colorcode].fillna("unknown")

    # naninfo information plus warnings
    naninfo = naninfo + f"\nTotal entries: {len(df)}\nMean: {df[feature].mean()}\nMedian: {df[feature].median()}\nVariance: {df[feature].var()}"
    threshold = len(df) / 2
    if df[feature].isna().sum() > threshold:
        naninfo = naninfo + "\nWARNING: More than 50 percent of entries in this dataset are missing thie feature."
    
    #color coding
    color_map = {
        "terrestrial": "blue", "super_earth": "red", "neptune_like": "green", "gas_giant": "purple", "tba": "black",
        "II": "yellow", "III": "orange", "IV": "green", "V": "blue", "VI": "black",
        "B": "darkblue", "A": "royalblue", "F": "seagreen", "G": "yellow", "K": "orange", "M": "red",
        "Transit": "royalblue", "Transit Timing Variations": "slateblue", "Eclipse Timing Variations": "mediumslateblue", "Orbital Brightness Modulation": "cornflowerblue", "Radial Velocity": "crimson", "Astrometry": "firebrick", "Imaging": "seagreen", "Disc Kinematics": "mediumseagreen", "Microlensing": "darkgoldenrod", "Pulsar Timing": "mediumvioletred", "Pulsation Timing Variations": "orchid", "Known Since Antiquity": "black",
        "Space": "deepskyblue", "Ground": "sienna", "Multiple Locales": "mediumorchid",
        "[Fe/H]": "steelblue", "[M/H]": "darkcyan",
        "unknown": "gray",
    }
    
    # relabels discrete columns for color coding
    if colorcode in df.columns and df[colorcode].dtype == "object":
        # Count values
        label_counts = df[colorcode].value_counts().to_dict()

        # Create a mapping: e.g., "terrestrial" → "terrestrial (42)"
        labeled_with_counts = {
            key: f"{key} ({label_counts.get(key, 0)})" for key in df[colorcode].unique()
        }

        # Apply the relabeling
        df["colorcode_labeled"] = df[colorcode].map(labeled_with_counts)
        colorcode_use = "colorcode_labeled"

        # Generate fallback color iterator from Plotly palette
        default_colors = px.colors.qualitative.Plotly
        default_color_cycle = iter(default_colors)

        color_map_labeled = {}
        for key in df[colorcode].unique():
            label = labeled_with_counts[key]
            if key in color_map:
                color_map_labeled[label] = color_map[key]
            else:
                color_map_labeled[label] = next(default_color_cycle)
    else:
        # For continuous or numeric colorcodes, no relabeling
        colorcode_use = colorcode
        color_map_labeled = color_map
    
    # Increases size of solar system planets
    df["is_solar"] = df["is_solar"].map(lambda x: bool(x) if pd.notnull(x) else False)
    df["marker_size"] = df["is_solar"].fillna(False).apply(lambda x: 3 if x else 1)
    
    # columns for hovertext
    if 'suns' in extra:
        hover_cols = ['hostname']
    else:
        hover_cols = ['pl_name', 'hostname', 'discoverymethod', 'disc_refname', 'disc_locale', 'disc_facility', 'disc_telescope', 'disc_instrument']
    
    # labeling logic
    feature_label = next(
        (opt['label'] for opt in dropdown_options if not opt.get('disabled') and opt['value'] == feature),
        feature  # fallback if not found
    )

    # Generate Plotly graphs
    if 'segment' in extra:
        # Get bin edges (optional if you're using a fixed bin count)
        bin_edges = np.histogram_bin_edges(df[feature], bins=no_bins)
        # Assign bin labels to each entry (explicit intervals)
        df['bin_label'] = pd.cut(df[feature], bins=bin_edges, include_lowest=True)
        df['bin_label_str'] = df['bin_label'].apply(lambda interval: f"{interval.left:.1f}-{interval.right:.1f}")

        # Count within each bin and colorcode
        grouped = df.groupby(['bin_label_str', colorcode_use]).size().unstack(fill_value=0)
        # Normalize each bin to percent
        normalized = grouped.div(grouped.sum(axis=1), axis=0) * 100  # Scale to 0–100%

        facet_values = normalized.columns
        cols = len(facet_values)
        # Build stacked bar chart with uniform bar height
        histogram = go.Figure()
        mini_hist = make_subplots(rows=1, cols=cols, shared_yaxes=True, subplot_titles=facet_values)
        for label in normalized.columns:
            histogram.add_trace(go.Bar(
                name=label,
                x=normalized.index.astype(str),
                y=normalized[label],
                marker_color=color_map_labeled.get(label, 'gray')
            ))
        for i, group_label in enumerate(facet_values):
            # This is the percentage the group contributes in each bin (already normalized)
            group_percentages = normalized[group_label]
            bin_labels = normalized.index  # Already formatted as strings

            mini_hist.add_trace(
                go.Bar(
                    x=bin_labels,
                    y=group_percentages,
                    name=group_label,
                    marker_color=color_map_labeled.get(group_label, 'gray'),
                    showlegend=False
                ),
                row=1,
                col=i+1
            )

        histogram.update_layout(
            barmode='stack',
            yaxis=dict(title='%', range=[0, 100]),
            xaxis=dict(title=feature),
            xaxis_title=feature_label,
            yaxis_title="Count"
        )
        histogram.update_layout(
            legend=dict(
                font=dict(size=20),  # Adjust size here
                itemsizing='constant'  # Keeps marker size consistent
            )
        )
        mini_hist.update_layout(
            # height=400,
            # width=400 * cols,
            yaxis=dict(title='%', range=[0, 100]),
            xaxis=dict(title=feature),
            title='Per Category',
            xaxis_title=feature_label,
        )

    else:
        histogram = px.histogram(df, x=feature, nbins=no_bins, color=colorcode_use, color_discrete_map=color_map_labeled if df[colorcode_use].dtype == "object" else None)
        histogram.update_layout(yaxis_type=scaley)
        histogram.update_layout(
            legend=dict(
                font=dict(size=20),  # Adjust size here
                itemsizing='constant'  # Keeps marker size consistent
            )
        )
        histogram.update_xaxes(title_text=feature_label)

        mini_hist = px.histogram(df, x=feature, nbins=no_bins, facet_col="colorcode_labeled", color="colorcode_labeled", color_discrete_map=color_map_labeled, title="Per Category")
        mini_hist.update_layout(showlegend=False, yaxis_type=scaley)
        for annotation in mini_hist.layout.annotations:
            original = annotation.text  # e.g., "colorcode_labeled=gas_giant"
            group_name = original.split('=')[-1]  # Extract the label
            annotation.text = group_name  # Set custom title
        mini_hist.update_xaxes(title_text=feature_label)

    dotplot = px.scatter(df, x=feature, color=colorcode_use, color_discrete_map=color_map_labeled if df[colorcode_use].dtype == "object" else None, size="marker_size", hover_data=hover_cols)
    dotplot.update_layout(xaxis_type=scalex)
    dotplot.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
    )
    dotplot.update_layout(
        legend=dict(
            font=dict(size=20),  # Adjust size here
            itemsizing='constant'  # Keeps marker size consistent
        )
    )
    dotplot.update_xaxes(title_text=feature_label)

    return histogram, mini_hist, dotplot, html.Pre(naninfo)

# change graphs based on selected tab
@app.callback(
    [Output('histograms', 'style'),
     Output('dotplot', 'style'),],
    [Input("tabs", "value")]
)
def update_tabs(tab):
    styles = {
        "tab-1": [{"display": "block"}, {"display": "none"}],
        "tab-2": [{"display": "none"}, {"display": "block"}],
    }
    return styles.get(tab, [{"display": "block"}, {"display": "none"}])
    
# Code to update slider min and max values
@app.callback(
    [Output('min-value-input', 'value', allow_duplicate=True),
     Output('max-value-input', 'value', allow_duplicate=True)],
    [Input('feature-dropdown', 'value'),
     Input('extra-checklist', 'value'),
     Input('update-button', 'n_clicks')],
    prevent_initial_call="initial_duplicate"
)
def update_sliders(feature, extra, n_clicks):
    ctx = dash.callback_context
    triggers = [t['prop_id'] for t in ctx.triggered]

    # Suppress only if triggered by update-button
    if any(t.startswith('update-button.') for t in triggers):
        raise dash.exceptions.PreventUpdate

    if not feature or not isinstance(extra, list):
        raise dash.exceptions.PreventUpdate

    if 'estimate' in extra:
        df = pl_es.copy()
    else:
        df = pl_s.copy()
    df = df.dropna(subset=[feature])
    if feature == "rowupdate" or feature == "releasedate":
        # print(df[feature].isna().sum(), "NaNs in", feature)
        # print("Sample bad values:", df[feature][df[feature].isna()].head())
        epoch = pd.Timestamp("1970-01-01")
        if feature in df.columns:
            df.loc[:, feature] = (
                pd.to_datetime(df[feature], errors="coerce")
                .subtract(epoch)
                .dt.total_seconds()
                .astype(int)
        )
    # Get min and max values for the selected feature and round them
    feature_min = np.floor(df[feature].min())
    feature_max = np.ceil(df[feature].max())

    return feature_min, feature_max

# Update graphs based on user input or stored settings
@app.callback(
    [Output('big_hist', 'figure'),
     Output('mini_hist', 'figure'),
     Output('dotplot', 'figure'),
     Output('naninfo-box', 'children'),
     Output('settings-store', 'data', allow_duplicate=True)],
    [Input('update-button', 'n_clicks'),
     Input('settings-store', 'data')],
    [State('filter-checklist', 'value'),
     State('extra-checklist', 'value'),
     State('scalex-radioitems', 'value'),
     State('scaley-radioitems', 'value'),
     State('feature-dropdown', 'value'),
     State("hist-bin-input", "value"),
     State('host-star-toggle', 'value'),
     State('host-star-input', 'value'),
     State('colorcode-radioitems', 'value'),
     State('pltype-checklist', 'value'),
     State('discmethod-checklist', 'value'),
     State('disclocale-checklist', 'value'),
     State('teff-checklist', 'value'),
     State('lum-checklist', 'value'),
     State('met-checklist', 'value'),
     State('min-value-input', 'value'),
     State('max-value-input', 'value'),],
    prevent_initial_call="initial_duplicate"
)
def update_figures(n_clicks, settings, *states):
    # Check if the callback was triggered by a button click or settings change
    ctx = dash.callback_context
    if not ctx.triggered:
        raise dash.exceptions.PreventUpdate

    trigger = ctx.triggered[0]['prop_id']

    if trigger == 'update-button.n_clicks':
        # Use current inputs
        histogram, mini_hist, dotplot, naninfo = update_graph(n_clicks, *states)
        settings_dict = dict(zip([
            'filter', 'extra', 'scalex', 'scaley', 'feature',
            # 'min', 'max',
            'bin','hosttoggle', 'hostinput', 'colorcode', 'pltype',
            'discmethod', 'disclocale', 'teff', 'lum', 'met',
        ], states))
        return histogram, mini_hist, dotplot, naninfo, settings_dict

    elif trigger == 'settings-store.data':
        # Use stored settings, not states
        if not settings:
            raise dash.exceptions.PreventUpdate
        histogram, mini_hist, dotplot, naninfo = update_graph(0, *unpack_settings(settings))
        return histogram, mini_hist, dotplot, naninfo, dash.no_update

# Restore settings from local storage when the app starts
@app.callback(
    [Output('filter-checklist', 'value'),
     Output('extra-checklist', 'value'),
     Output('scalex-radioitems', 'value'),
     Output('scaley-radioitems', 'value'),
     Output('feature-dropdown', 'value'),
     Output("hist-bin-input", "value"),
     Output('host-star-toggle', 'value'),
     Output('host-star-input', 'value'),
     Output('colorcode-radioitems', 'value'),
     Output('pltype-checklist', 'value'),
     Output('discmethod-checklist', 'value'),
     Output('disclocale-checklist', 'value'),
     Output('teff-checklist', 'value'),
     Output('lum-checklist', 'value'),
     Output('met-checklist', 'value'),
     Output('init-flag', 'data')],
    Input('settings-store', 'data'),
    State('init-flag', 'data'),
    prevent_initial_call="initial_duplicate"
)
def restore_settings(settings, already_initialized):
    if not settings or already_initialized:
        raise dash.exceptions.PreventUpdate

    return (
        settings['filter'], settings['extra'], settings['scalex'], settings['scaley'],
        settings['feature'],
        settings['bin'],
        settings['hosttoggle'], settings['hostinput'], settings['colorcode'],
        settings['pltype'], settings['discmethod'], settings['disclocale'],
        settings['teff'], settings['lum'], settings['met'],
        True
    )

# Unpack settings from the stored dictionary
def unpack_settings(settings):
    df = pl_es.copy() if 'estimate' in settings['extra'] else pl_s.copy()
    df = df.dropna(subset=[settings['feature']])
    # Convert date features to seconds since epoch if applicable
    if settings['feature'] in ["rowupdate", "releasedate"]:
        epoch = pd.Timestamp("1970-01-01")
        df.loc[:, settings['feature']] = (
            pd.to_datetime(df[settings['feature']], errors="coerce")
            .subtract(epoch)
            .dt.total_seconds()
            .astype(int)
        )
    # Recompute min/max based on feature and extra
    min_val = np.floor(df[settings['feature']].min())
    max_val = np.ceil(df[settings['feature']].max())

    return (
        settings['filter'], settings['extra'], settings['scalex'], settings['scaley'],
        settings['feature'], settings['bin'], settings['hosttoggle'], settings['hostinput'],
        settings['colorcode'], settings['pltype'], settings['discmethod'], settings['disclocale'],
        settings['teff'], settings['lum'], settings['met'],
        min_val, max_val
    )

# Clear the store when the reset button is clicked
@app.callback(
    Output('settings-store', 'data', allow_duplicate=True),
    Input('reset-button', 'n_clicks'),
    prevent_initial_call=True
)
def clear_store(n):
    return None  # or {}

if __name__ == '__main__':
    app.run(debug=True, port=8053)
    print("running on localhost:8053")
