In [None]:
# PCA (Credit to Microsoft Copilot)
# Note to self: figure out how the heck this works)
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, ctx, MATCH, ALL
import plotly.graph_objects as go
import plotly.express as px

# Imports exoplanet classification lists
%run -i assets/lists.ipynb

# imports Planetary systems csv and adds pl_type column based on classification lists
pl_s = pd.read_csv('assets/Planetary_Systems.csv', comment="#", low_memory=False)
pl_s["pl_type"] = pl_s["pl_name"].apply(
    lambda x: "terrestrial" if x in terrestrial # type: ignore
    else "super_earth" if x in super_earths # type: ignore
    else "unknown" if x in unknown # type: ignore
    else "neptune_like" if x in neptune_like # type: ignore
    else "gas_giant" if x in gas_giants # type: ignore
    else "none"
)

# imports atmospheric list for one filter.
at_s = pd.read_csv('assets/Atmospheric_Spectroscopy.csv', comment="#", low_memory=False)

# Dash app
app = Dash(__name__, external_stylesheets=["assets/style.css"])

# List of database columns to take into account for graphs
planet_features = {
    'Planet': [
        {'label': "Orbital Period (pl_orbper)", 'value': "pl_orbper"}, # in days (Not in values dataset)
        {'label': "Orbit Semi-Major Axis (pl_orbsmax)", 'value': "pl_orbsmax"}, # In Astronomical Units ( Not in values dataset)
        {'label': "Epoch of Periastron (pl_orbtper)", 'value': "pl_orbtper"}, # in degrees (Not in values dataset)
        {'label': "Argument of Periastron (pl_orblper)", 'value': "pl_orblper"}, # in degrees (Not in values dataset)
        {'label': "Proj. Obliquity (pl_projobliq)", 'value': "pl_projobliq"}, # in degrees (Not in values dataset)
        {'label': "True Obliquity (pl_trueobliq)", 'value': "pl_trueobliq"}, # in degrees (Not in values dataset)
        {'label': "Radius (pl_rade)", 'value': "pl_rade"}, # in Earth radiuses (Not in values dataset)
        {'label': "Mass (pl_bmasse)", 'value': "pl_bmasse"}, # estimation, in Earth masses ( Not in values dataset)
        {'label': "Density (pl_dens)", 'value': "pl_dens"}, # in g/cm^3 (Not in values dataset)
        {'label': "Orbital Eccentricity (pl_orbeccen)", 'value': "pl_orbeccen"}, # (Not in values dataset)
        {'label': "Insol. Flux (pl_insol)", 'value': "pl_insol"}, # in Earth flux (Not in values dataset)
        {'label': "Equil. Temp. (pl_eqt)", 'value': "pl_eqt"},  # in Kelvin (Not in values dataset)
        {'label': "Transit Duration (pl_trandur)", 'value': "pl_trandur"}, # in hours (Not in values dataset)
        {'label': "Transit Midpoint (pl_tranmid)", 'value': "pl_tranmid"}, # in days (Not in values dataset)
        {'label': "Transit Depth (pl_trandep)", 'value': "pl_trandep"}, # percentage (Not in values dataset)
        {'label': "Impact Parameter (pl_imppar)", 'value': "pl_imppar"}, # (Not in values dataset)
        {'label': "Occulation Depth (pl_occdep)", 'value': "pl_occdep"}, # percentage (Not in values dataset)
        {'label': "Rad. Velocity Amplitude (pl_rvamp)", 'value': "pl_rvamp"}, # in m/s (ot in values dataset)
        {'label': "Discovery Year (disc_year)", 'value': "disc_year"},
        # {'label': "Last Update", 'value': "rowupdate"}, # last update of parameters
        # {'label': "Public Release Date", 'value': "releasedate"}, # date publicly released
    ],
    'Stellar': [
        {'label': "Effec. Temp. (st_teff)", 'value': "st_teff"}, # in Kelvin
        {'label': "Radius (st_rad)", 'value': "st_rad"}, # in Solar radiuses
        {'label': "Mass (st_mass)", 'value': "st_mass"},
        {'label': "Density (st_dens)", 'value': "st_dens"},
        {'label': "Surface Grav. (st_logg)", 'value': "st_logg"},
        {'label': "Age (st_age)", 'value': "st_age"}, # in gigayears
        {'label': "Rot. Period (st_rotp)", 'value': "st_rotp"},
        {'label': "Rot. Velocity (st_vsin)", 'value': "st_vsin"},
        {'label': "Rad. Velocity (st_radv)", 'value': "st_radv"},
        {'label': "Metallicity (st_met)", 'value': "st_met"},
        {'label': "Luminosity (st_lum)", 'value': "st_lum"},
    ],
    'System': [
        {'label': "Parallax (sy_plx)", 'value': "sy_plx"},
        {'label': "Dist from Earth (sy_dist)", 'value': "sy_dist"}, # in parsecs
        {'label': "u (Sloan) Magnitude (sy_umag, ~354 nm)", 'value': "sy_umag"},
        {'label': "B (Johnson) Magnitude (sy_bmag, ~442 nm)", 'value': "sy_bmag"},
        {'label': "g (Sloan) Magnitude (sy_gmag, ~475 nm)", 'value': "sy_gmag"},
        {'label': "V (Johnson) Magnitude (sy_vmag, ~540 nm)", 'value': "sy_vmag"},
        {'label': "Kepler Magnitude (sy_kepmag, ~600 nm)", 'value': "sy_kepmag"},
        {'label': "r (Sloan) Magnitude (sy_rmag, ~622 nm)", 'value': "sy_rmag"},
        {'label': "Gaia Magnitude (sy_gaiamag, ~673 nm)", 'value': "sy_gaiamag"},
        {'label': "i (Sloan) Magnitude (sy_imag, ~763 nm)", 'value': "sy_imag"},
        {'label': "I (Cousins) Magnitude (sy_icmag, ~786.5 nm)", 'value': "sy_icmag"},
        {'label': "TESS Magnitude (sy_tmag, ~800 nm)", 'value': "sy_tmag"},
        {'label': "z (Sloan) Magnitude (sy_zmag, ~905 nm)", 'value': "sy_zmag"},
        {'label': "J (2MASS) Magnitude (sy_jmag, ~1.25 μm)", 'value': "sy_jmag"},
        {'label': "H (2MASS) Magnitude (sy_hmag, ~1.65 μm)", 'value': "sy_hmag"},
        {'label': "Ks (2MASS) Magnitude (sy_kmag, ~2,15 μm)", 'value': "sy_kmag"},
        {'label': "W1 (WISE) Magnitude (sy_w1mag, ~3.4 μm)", 'value': "sy_w1mag"},
        {'label': "W2 (WISE) Magnitude (sy_w2mag, ~4.6 μm)", 'value': "sy_w2mag"},
        {'label': "W3 (WISE) Magnitude (sy_w3mag, ~12 μm)", 'value': "sy_w3mag"},
        {'label': "W4 (WISE) Magnitude (sy_w4mag, ~22 μm)", 'value': "sy_w4mag"},
    ]
}
initial_values = [['pl_orbper'], [], ["sy_plx", "sy_dist", "sy_bmag", "sy_vmag", "sy_jmag", "sy_hmag", "sy_kmag", "sy_gaiamag", "sy_tmag"]]

app.layout = html.Div([
    html.H1("PCA Analysis"),

    html.H2("Data filters:"),
    dcc.Checklist(
        id='filter-checklist',
        options=[
            {'label': 'Has atmospheric data', 'value': 'atmoData'},
            {'label': 'Default parameter set', 'value': 'default'},
            {'label': 'No controversial flag', 'value': 'noControv'}
        ],
        value=['default', 'noControv'],  # Default values to filter by
        className="checkbox-container"
    ),
    dcc.Checklist(
        id='simplification-checkbox',
        options=[
            {'label': html.Span(['Simplify planet types?'], style={'fontWeight': 'bold'}), 'value': 'box'},
        ],
        className="checkbox-container"
    ),

    html.H2("Select Features:"),
    *[
        html.Div([
            html.H3(group),
            dcc.Checklist(
                id={'type': 'feature-checklist', 'index': i},
                options=options,
                value=initial_values[i],
                className="checkbox-container" 
            )
        ]) for i, (group, options) in enumerate(planet_features.items())
    ],

    html.H2("Color coding:"),
    dcc.RadioItems(
        id="colorcode-radioitems",
        options=[
            {'label': 'Planet Type', 'value': 'pl_type'},
            {'label': 'Next Principal Component', 'value': 'PC'},
            # {'label': 'Host Star', 'value': 'hostname'}, # too much computational power.
        ],
        value="pl_type",
        className="checkbox-container"
    ),

    html.H2("How to handle NaN values:"),
    dcc.RadioItems(
        id="nanhandle-radioitems",
        options=[
            {'label': 'Set to 0', 'value': 'zero'},
            {'label': 'Set to mean', 'value': 'mean'},
            {'label': 'Set to median', 'value': 'median'},
        ],
        value="zero",
        className="checkbox-container"
    ),
    
    dcc.Tabs(id="tabs", value="tab-1", children=[
        dcc.Tab(label="3D PCA Scatter Plot", value="tab-1", className="tab"),
        dcc.Tab(label="2D PCA Scatter Plot", value="tab-2", className="tab"),
        dcc.Tab(label="Explained Variance", value="tab-3", className="tab"),
        dcc.Tab(label="Feature Loadings Heatmap", value="tab-4", className="tab"),
    ]),
    html.Div(id="tabs-content", children=[
        dcc.Graph(id="graph-3d", figure={}, style={"display": "block"}),
        dcc.Graph(id="graph-2d", figure={}, style={"display": "none"}),
        dcc.Graph(id="variance-bar", figure={}, style={"display": "none"}),
        dcc.Graph(id="loadings-heatmap", figure={}, style={"display": "none"}),
    ]),  # This will hold the selected graph

    html.P('NaN info:', id='naninfo-box')
], style={'color': 'black', 'font-family': 'Arial', 'backgroundColor': 'white', 'padding': '20px', 'text-align': 'center'})

@app.callback(
    [Output('graph-3d', 'figure'),
     Output('graph-2d', 'figure'),
     Output('variance-bar', 'figure'),
     Output('loadings-heatmap', 'figure'),
     Output('naninfo-box', 'children')],
    [Input('filter-checklist', 'value'),
     Input('simplification-checkbox', 'value'),
     Input({'type': 'feature-checklist', 'index': ALL}, 'value'),
     Input('colorcode-radioitems', 'value'),
     Input('nanhandle-radioitems', 'value')]
)
def update_graph(filters, simple, feature_vals, colorcode, nanhandle):
    # Combines all feature checkboxes into one list.
    features = sorted(set(val for group in feature_vals for val in group))

    # Load dataset
    df = pl_s

    # Apply filters
    if 'atmoData' in filters:
        df = df[df['pl_name'].isin(at_s['PL_NAME'])]
    if 'default' in filters:
        df = df[df['default_flag'] == True]
    if 'noControv' in filters:
        df = df[df['pl_controv_flag'] == False]
    if simple:
        df['pl_type'] = df['pl_type'].replace({'super_earth': 'terrestrial'})
        df['pl_type'] = df['pl_type'].replace({'neptune_like': 'gas_giant'})
    # df=df[df['pl_dens']>1]
    # df=df[df['pl_name']!="COCONUTS-2 b"]

    # Returns empty graphs and error message for certain anomalies
    missing_features = [f for f in features if f not in df.columns]
    if missing_features:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), "ERROR: Missing columns " + missing_features
    if df.empty:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), 'ERROR: filtered dataset is empty'
    if len(features) <= 2:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), 'ERROR: not enough features selected'
    
    # Select features to be evaluated (keeps certain others for hover info)
    if features:
        df = df[["pl_name"] + ["pl_type"] + ["discoverymethod"] + ["disc_refname"] + ["disc_locale"] + ["disc_facility"] + ["disc_telescope"] + ["disc_instrument"] + ["hostname"] + features]
    # df["rowupdate"] = pd.to_datetime(df["rowupdate"])
    # df["rowupdate"] = (df["rowupdate"] - datetime.datetime(1970, 1, 1)).dt.total_seconds()
    # df["releasedate"] = pd.to_datetime(df["releasedate"], errors="coerce")
    # df["releasedate"] = (df["releasedate"] - datetime.datetime(1970, 1, 1)).dt.total_seconds()

    X = df[features].values  # Convert DataFrame to NumPy array

    # Gets information on NaNs in each column
    naninfo = ["NaN info:"]
    nan_counts = np.isnan(X).sum(axis=0)  # Count NaNs per column
    most_nans_index = np.argmax(nan_counts)  # Find index of the feature with the most NaNs
    for i, (col_name, count) in enumerate(zip(features, nan_counts)):
        suffix = " (highest)" if i == most_nans_index else ""
        naninfo.append(f"{col_name}: {count} NaNs{suffix}")
    nanstring = "\n".join(naninfo)

    # Replace NaNs with the value given by nanhandle for PCA
    X_cleaned = X
    if nanhandle == 'zero':
        X_cleaned = np.where(np.isnan(X), 0, X)
    elif nanhandle == 'mean':
        X_cleaned = np.where(np.isnan(X), np.nanmean(X), X)
    elif nanhandle == 'median':
        X_cleaned = np.where(np.isnan(X), np.nanmedian(X), X)

    # PCA-required data normalization
    X_mean = np.mean(X_cleaned, axis=0)
    X_std = np.std(X_cleaned, axis=0)
    X_normalized = (X_cleaned - X_mean) / X_std

    # Numpy handles covariance matrix, eigenvalues, and eigenvectors
    cov_matrix = np.cov(X_normalized, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvectors by descending eigenvalues
    idx = np.argsort(eigenvalues)[::-1]
    eigenvectors = eigenvectors[:, idx]
    eigenvalues = eigenvalues[idx]
    explained_variance_ratio = eigenvalues / np.sum(eigenvalues)

    # make PCA array
    loadings = eigenvectors[:, :4] # Extract first 4 principal components for graphs
    X_pca = np.dot(X_normalized, loadings)

    # Create DataFrame for visualization by combining PCA columns and original DataFrame
    df_pca = pd.concat([df, pd.DataFrame(X_pca, columns=["PC1", "PC2", "PC3", "PC4"])], axis=1)
    df_pca = df_pca.dropna(subset=["pl_name"])  # Remove rows where pl_name is missing

    # Creates list of additional features for hover info
    exclude_cols = ["PC1", "PC2", "PC3", "PC4", "pl_type"]
    hover_features = [col for col in df_pca.columns if col not in exclude_cols] # Select all columns except the excluded ones
    hover_data_dict = {col: True for col in hover_features} # Convert list to dictionary format for hover_data

    # If using the PC color coding, then the column will be different for both graphs in this one.
    colorcode_3d = "PC4" if colorcode == "PC" else colorcode
    colorcode_2d = "PC3" if colorcode == "PC" else colorcode

    # **Graph 1: 3D PCA Scatter Plot**
    fig_3d = px.scatter_3d(df_pca, x="PC1", y="PC2", z="PC3", color=colorcode_3d, hover_data=hover_data_dict)
    fig_3d.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
        marker=dict(size=2)
    )

    # **Graph 2: 2D PCA Scatter Plot**
    fig_2d = px.scatter(df_pca, x="PC1", y="PC2", color=colorcode_2d, hover_data=hover_data_dict)
    fig_2d.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
    )

    # **Graph 3: Explained Variance Bar Chart**
    explained_variance_ratio = eigenvalues / np.sum(eigenvalues)
    cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
    df_eigen = pd.DataFrame({
        "Principal Component": [f"PC{i+1}" for i in range(len(explained_variance_ratio))],
        "Explained Variance Ratio": explained_variance_ratio,
        "Cumulative Variance Ratio": cumulative_variance_ratio
    })
    fig_variance = px.bar(df_eigen, x="Principal Component", y="Explained Variance Ratio", text_auto=True)
    fig_variance.add_scatter(x=df_eigen["Principal Component"], y=df_eigen["Cumulative Variance Ratio"], mode="lines+markers", name="Cumulative Variance")


    # **Graph 4: Feature Loadings Heatmap**
    df_loadings = pd.DataFrame(loadings, index=features, columns=["PC1", "PC2", "PC3", "PC4"])
    df_loadings = df_loadings.reindex(df_loadings["PC1"].abs().sort_values(ascending=False).index)
    fig_loadings = px.imshow(df_loadings, labels={"x": "Principal Component", "y": "Feature", "color": "Contribution",})

    return fig_3d, fig_2d, fig_variance, fig_loadings, html.Pre(nanstring)

@app.callback(
    [Output("graph-3d", "style"),
     Output("graph-2d", "style"),
     Output("variance-bar", "style"),
     Output("loadings-heatmap", "style")],
    Input("tabs", "value"),
)
def update_tabs(tab):
    styles = {
        "tab-1": [{"display": "block"}, {"display": "none"}, {"display": "none"}, {"display": "none"}],
        "tab-2": [{"display": "none"}, {"display": "block"}, {"display": "none"}, {"display": "none"}],
        "tab-3": [{"display": "none"}, {"display": "none"}, {"display": "block"}, {"display": "none"}],
        "tab-4": [{"display": "none"}, {"display": "none"}, {"display": "none"}, {"display": "block"}],
    }
    return styles.get(tab, [{"display": "block"}, {"display": "none"}, {"display": "none"}, {"display": "none"}])
    
if __name__ == '__main__':
    app.run(debug=True, port=8051)

# Hypothesis: Is stellar age related to the differentiation of planets?
# Move to Streamlit (must move to python base first)
# Understand in context of star system (Too computationaly taxing apparently.)
# Put this after the starmaps, suggestions:

In [None]:
# UMAP (Credit to Microsoft Copilot)
# Note to self: figure out how the heck this works)
# For UMAP, use 2-10 neighbors (local) and 30-100 (global). Put after PCA
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, State, ctx, MATCH, ALL
import plotly.graph_objects as go
import plotly.express as px
import umap
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Imports exoplanet classification lists
%run -i assets/lists.ipynb

# imports Planetary systems csv and adds pl_type column based on classification lists
pl_s = pd.read_csv('assets/Planetary_Systems.csv', comment="#", low_memory=False)
pl_s["pl_type"] = pl_s["pl_name"].apply(
    lambda x: "terrestrial" if x in terrestrial # type: ignore
    else "super_earth" if x in super_earths # type: ignore
    else "unknown" if x in unknown # type: ignore
    else "neptune_like" if x in neptune_like # type: ignore
    else "gas_giant" if x in gas_giants # type: ignore
    else "none"
)

# imports atmospheric list for one filter.
at_s = pd.read_csv('assets/Atmospheric_Spectroscopy.csv', comment="#", low_memory=False)

# Dash app
app = Dash(__name__, external_stylesheets=["assets/style.css"])

# List of database columns to take into account for graphs
planet_features = {
    'Planet': [
        {'label': "Orbital Period (pl_orbper)", 'value': "pl_orbper"}, # in days (Not in values dataset)
        {'label': "Orbit Semi-Major Axis (pl_orbsmax)", 'value': "pl_orbsmax"}, # In Astronomical Units ( Not in values dataset)
        {'label': "Epoch of Periastron (pl_orbtper)", 'value': "pl_orbtper"}, # in degrees (Not in values dataset)
        {'label': "Argument of Periastron (pl_orblper)", 'value': "pl_orblper"}, # in degrees (Not in values dataset)
        {'label': "Proj. Obliquity (pl_projobliq)", 'value': "pl_projobliq"}, # in degrees (Not in values dataset)
        {'label': "True Obliquity (pl_trueobliq)", 'value': "pl_trueobliq"}, # in degrees (Not in values dataset)
        {'label': "Radius (pl_rade)", 'value': "pl_rade"}, # in Earth radiuses (Not in values dataset)
        {'label': "Mass (pl_bmasse)", 'value': "pl_bmasse"}, # estimation, in Earth masses ( Not in values dataset)
        {'label': "Density (pl_dens)", 'value': "pl_dens"}, # in g/cm^3 (Not in values dataset)
        {'label': "Orbital Eccentricity (pl_orbeccen)", 'value': "pl_orbeccen"}, # (Not in values dataset)
        {'label': "Insol. Flux (pl_insol)", 'value': "pl_insol"}, # in Earth flux (Not in values dataset)
        {'label': "Equil. Temp. (pl_eqt)", 'value': "pl_eqt"},  # in Kelvin (Not in values dataset)
        {'label': "Transit Duration (pl_trandur)", 'value': "pl_trandur"}, # in hours (Not in values dataset)
        {'label': "Transit Midpoint (pl_tranmid)", 'value': "pl_tranmid"}, # in days (Not in values dataset)
        {'label': "Transit Depth (pl_trandep)", 'value': "pl_trandep"}, # percentage (Not in values dataset)
        {'label': "Impact Parameter (pl_imppar)", 'value': "pl_imppar"}, # (Not in values dataset)
        {'label': "Occulation Depth (pl_occdep)", 'value': "pl_occdep"}, # percentage (Not in values dataset)
        {'label': "Rad. Velocity Amplitude (pl_rvamp)", 'value': "pl_rvamp"}, # in m/s (ot in values dataset)
        {'label': "Discovery Year (disc_year)", 'value': "disc_year"},
        # {'label': "Last Update", 'value': "rowupdate"}, # last update of parameters
        # {'label': "Public Release Date", 'value': "releasedate"}, # date publicly released
    ],
    'Stellar': [
        {'label': "Effec. Temp. (st_teff)", 'value': "st_teff"}, # in Kelvin
        {'label': "Radius (st_rad)", 'value': "st_rad"}, # in Solar radiuses
        {'label': "Mass (st_mass)", 'value': "st_mass"},
        {'label': "Density (st_dens)", 'value': "st_dens"},
        {'label': "Surface Grav. (st_logg)", 'value': "st_logg"},
        {'label': "Age (st_age)", 'value': "st_age"}, # in gigayears
        {'label': "Rot. Period (st_rotp)", 'value': "st_rotp"},
        {'label': "Rot. Velocity (st_vsin)", 'value': "st_vsin"},
        {'label': "Rad. Velocity (st_radv)", 'value': "st_radv"},
        {'label': "Metallicity (st_met)", 'value': "st_met"},
        {'label': "Luminosity (st_lum)", 'value': "st_lum"},
    ],
    'System': [
        {'label': "Parallax (sy_plx)", 'value': "sy_plx"},
        {'label': "Dist from Earth (sy_dist)", 'value': "sy_dist"}, # in parsecs
        {'label': "u (Sloan) Magnitude (sy_umag, ~354 nm)", 'value': "sy_umag"},
        {'label': "B (Johnson) Magnitude (sy_bmag, ~442 nm)", 'value': "sy_bmag"},
        {'label': "g (Sloan) Magnitude (sy_gmag, ~475 nm)", 'value': "sy_gmag"},
        {'label': "V (Johnson) Magnitude (sy_vmag, ~540 nm)", 'value': "sy_vmag"},
        {'label': "Kepler Magnitude (sy_kepmag, ~600 nm)", 'value': "sy_kepmag"},
        {'label': "r (Sloan) Magnitude (sy_rmag, ~622 nm)", 'value': "sy_rmag"},
        {'label': "Gaia Magnitude (sy_gaiamag, ~673 nm)", 'value': "sy_gaiamag"},
        {'label': "i (Sloan) Magnitude (sy_imag, ~763 nm)", 'value': "sy_imag"},
        {'label': "I (Cousins) Magnitude (sy_icmag, ~786.5 nm)", 'value': "sy_icmag"},
        {'label': "TESS Magnitude (sy_tmag, ~800 nm)", 'value': "sy_tmag"},
        {'label': "z (Sloan) Magnitude (sy_zmag, ~905 nm)", 'value': "sy_zmag"},
        {'label': "J (2MASS) Magnitude (sy_jmag, ~1.25 μm)", 'value': "sy_jmag"},
        {'label': "H (2MASS) Magnitude (sy_hmag, ~1.65 μm)", 'value': "sy_hmag"},
        {'label': "Ks (2MASS) Magnitude (sy_kmag, ~2,15 μm)", 'value': "sy_kmag"},
        {'label': "W1 (WISE) Magnitude (sy_w1mag, ~3.4 μm)", 'value': "sy_w1mag"},
        {'label': "W2 (WISE) Magnitude (sy_w2mag, ~4.6 μm)", 'value': "sy_w2mag"},
        {'label': "W3 (WISE) Magnitude (sy_w3mag, ~12 μm)", 'value': "sy_w3mag"},
        {'label': "W4 (WISE) Magnitude (sy_w4mag, ~22 μm)", 'value': "sy_w4mag"},
    ]
}
initial_values = [['pl_rade'], ['st_mass'], ["sy_plx"]]

app.layout = html.Div([
    html.H1("UMAP Analysis"),

    html.H2("Data filters:"),
    dcc.Checklist(
        id='filter-checklist',
        options=[
            {'label': 'Has atmospheric data', 'value': 'atmoData'},
            {'label': 'Default parameter set', 'value': 'default'},
            {'label': 'No controversial flag', 'value': 'noControv'}
        ],
        value=['default', 'noControv'],  # Default values to filter by
        className="checkbox-container"
    ),
    dcc.Checklist(
        id='simplification-checkbox',
        options=[
            {'label': html.Span(['Simplify planet types?'], style={'fontWeight': 'bold'}), 'value': 'box'},
        ],
        className="checkbox-container"
    ),

    html.H2("Select Features:"),
    *[
        html.Div([
            html.H3(group),
            dcc.Checklist(
                id={'type': 'feature-checklist', 'index': i},
                options=options,
                value=initial_values[i],
                className="checkbox-container" 
            )
        ]) for i, (group, options) in enumerate(planet_features.items())
    ],
    
    html.H2("How to handle NaN values:"),
    dcc.RadioItems(
        id="nanhandle-radioitems",
        options=[
            {'label': 'Set to 0', 'value': 'zero'},
            {'label': 'Set to mean', 'value': 'mean'},
            {'label': 'Set to median', 'value': 'median'},
        ],
        value="zero",
        className="checkbox-container"
    ),

    html.H2("Neighbors:"),
    dcc.Slider(
        id="neighbors-slider",
        min=2,
        max=100,
        step=1,
        value=15,
        marks={i: str(i) for i in range(0, 100, 10)},
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    html.Button('Update Graph', id='update-button', n_clicks=1),

    dcc.Graph(id="graph", figure={}),

    html.P('NaN info:', id='naninfo-box')
], style={'color': 'black', 'font-family': 'Arial', 'backgroundColor': 'white', 'padding': '20px', 'text-align': 'center'})

@app.callback(
    [Output('graph', 'figure'),
     Output('naninfo-box', 'children')],
     Input('update-button', 'n_clicks'),
    [State('filter-checklist', 'value'),
     State('simplification-checkbox', 'value'),
     State({'type': 'feature-checklist', 'index': ALL}, 'value'),
     State('nanhandle-radioitems', 'value'),
     State('neighbors-slider', 'value')]
)
def update_graph(update, filters, simple, feature_vals, nanhandle, neighbors):
    print(f"running graph {update}")

    # Combines all feature checkboxes into one list.
    features = sorted(set(val for group in feature_vals for val in group))

    # Load dataset
    df = pl_s

    # Apply filters
    if 'atmoData' in filters:
        df = df[df['pl_name'].isin(at_s['PL_NAME'])]
    if 'default' in filters:
        df = df[df['default_flag'] == True]
    if 'noControv' in filters:
        df = df[df['pl_controv_flag'] == False]
    if simple:
        df['pl_type'] = df['pl_type'].replace({'super_earth': 'terrestrial'})
        df['pl_type'] = df['pl_type'].replace({'neptune_like': 'gas_giant'})
    # df=df[df['pl_dens']>1]
    # df=df[df['pl_name']!="COCONUTS-2 b"]

    # Returns empty graphs and error message for certain anomalies
    missing_features = [f for f in features if f not in df.columns]
    if missing_features:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), "ERROR: Missing columns " + missing_features
    if df.empty:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), 'ERROR: filtered dataset is empty'
    if len(features) <= 2:
        return go.Figure(), go.Figure(), go.Figure(), go.Figure(), 'ERROR: not enough features selected'
    
    # Select features to be evaluated (keeps certain others for hover info)
    if features:
        df = df[["pl_name"] + ["pl_type"] + ["discoverymethod"] + ["disc_refname"] + ["disc_locale"] + ["disc_facility"] + ["disc_telescope"] + ["disc_instrument"] + features]
    # df["rowupdate"] = pd.to_datetime(df["rowupdate"])
    # df["rowupdate"] = (df["rowupdate"] - datetime.datetime(1970, 1, 1)).dt.total_seconds()
    # df["releasedate"] = pd.to_datetime(df["releasedate"], errors="coerce")
    # df["releasedate"] = (df["releasedate"] - datetime.datetime(1970, 1, 1)).dt.total_seconds()

    X = df[features].values  # Convert DataFrame to NumPy array

    # Gets information on NaNs in each column
    naninfo = ["NaN info:"]
    nan_counts = np.isnan(X).sum(axis=0)  # Count NaNs per column
    most_nans_index = np.argmax(nan_counts)  # Find index of the feature with the most NaNs
    for i, (col_name, count) in enumerate(zip(features, nan_counts)):
        suffix = " (highest)" if i == most_nans_index else ""
        naninfo.append(f"{col_name}: {count} NaNs{suffix}")
    nanstring = "\n".join(naninfo)

    # Replace NaNs with the value given by nanhandle for PCA
    X_cleaned = X
    if nanhandle == 'zero':
        X_cleaned = np.where(np.isnan(X), 0, X)
    elif nanhandle == 'mean':
        X_cleaned = np.where(np.isnan(X), np.nanmean(X), X)
    elif nanhandle == 'median':
        X_cleaned = np.where(np.isnan(X), np.nanmedian(X), X)

    # PCA-required data normalization
    X_mean = np.mean(X_cleaned, axis=0)
    X_std = np.std(X_cleaned, axis=0)
    X_normalized = (X_cleaned - X_mean) / X_std

    # Numpy handles covariance matrix, eigenvalues, and eigenvectors
    cov_matrix = np.cov(X_normalized, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvectors by descending eigenvalues
    idx = np.argsort(eigenvalues)[::-1]
    eigenvectors = eigenvectors[:, idx]
    eigenvalues = eigenvalues[idx]

    # make PCA array
    loadings = eigenvectors[:, :2] # Extract first 2 principal components to keep
    X_pca = np.dot(X_normalized, loadings)

    # UMAP
    embedding = umap.UMAP(n_neighbors=neighbors, min_dist=0.1).fit_transform(X_pca)

    # Create DataFrame for visualization by combining UMAP columns and original DataFrame
    umap_df = pd.concat([df, pd.DataFrame(embedding, columns=["UMAP1", "UMAP2"])], axis=1)
    umap_df = umap_df.dropna(subset=["pl_name"])  # Remove rows where pl_name is missing

    # Creates list of additional features for hover info
    exclude_cols = ["UMAP1", "UMAP2", "pl_type"]
    hover_features = [col for col in umap_df.columns if col not in exclude_cols] # Select all columns except the excluded ones
    hover_data_dict = {col: True for col in hover_features} # Convert list to dictionary format for hover_data
    
    # Scatterplot
    fig = px.scatter(
        umap_df, x="UMAP1", y="UMAP2", color="pl_type", hover_data=hover_data_dict,
        opacity=0.7
    )
    fig.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        )
    )

    print("done")
    return fig, html.Pre(nanstring)
    
if __name__ == '__main__':
    app.run(debug=True, port=8052)

running graph 1


[2025-06-19 15:17:08,336] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\dash.py", line 1414, in dispatch
    ctx.run(
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 536, in add_context
    raise err
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 525, in add_context
    output_value = _invoke_callback(func, *func_args, **func_kwargs)  # type: ignore[reportArgumentType]
               

running graph 1


[2025-06-19 15:24:54,367] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\dash.py", line 1414, in dispatch
    ctx.run(
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 536, in add_context
    raise err
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 525, in add_context
    output_value = _invoke_callback(func, *func_args, **func_kwargs)  # type: ignore[reportArgumentType]
               

In [None]:
# histogram/dotplot (Credit to Microsoft Copilot)
# Note to self: figure out how the heck this works)
import dash
import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output, ctx, ALL
import plotly.graph_objects as go
import plotly.express as px

# Imports exoplanet classification lists
%run -i assets/lists.ipynb

# imports Planetary systems csv and adds pl_type column based on classification lists
pl_s = pd.read_csv('assets/Planetary_Systems.csv', comment="#", low_memory=False)
pl_s["pl_type"] = pl_s["pl_name"].apply(
    lambda x: "terrestrial" if x in terrestrial # type: ignore
    else "super_earth" if x in super_earths # type: ignore
    else "unknown" if x in unknown # type: ignore
    else "neptune_like" if x in neptune_like # type: ignore
    else "gas_giant" if x in gas_giants # type: ignore
    else "none"
)

# imports atmospheric list for one filter.
at_s = pd.read_csv('assets/Atmospheric_Spectroscopy.csv', comment="#", low_memory=False)

# Dash app
app = Dash(__name__, external_stylesheets=["assets/style.css"])

# List of database columns to take into account for graphs
planet_features = {
    'Planet': [
        {'label': "Orbital Period (pl_orbper)", 'value': "pl_orbper"}, # in days (Not in values dataset)
        {'label': "Orbit Semi-Major Axis (pl_orbsmax)", 'value': "pl_orbsmax"}, # In Astronomical Units ( Not in values dataset)
        {'label': "Epoch of Periastron (pl_orbtper)", 'value': "pl_orbtper"}, # in degrees (Not in values dataset)
        {'label': "Argument of Periastron (pl_orblper)", 'value': "pl_orblper"}, # in degrees (Not in values dataset)
        {'label': "Proj. Obliquity (pl_projobliq)", 'value': "pl_projobliq"}, # in degrees (Not in values dataset)
        {'label': "True Obliquity (pl_trueobliq)", 'value': "pl_trueobliq"}, # in degrees (Not in values dataset)
        {'label': "Radius (pl_rade)", 'value': "pl_rade"}, # in Earth radiuses (Not in values dataset)
        {'label': "Mass (pl_bmasse)", 'value': "pl_bmasse"}, # estimation, in Earth masses ( Not in values dataset)
        {'label': "Density (pl_dens)", 'value': "pl_dens"}, # in g/cm^3 (Not in values dataset)
        {'label': "Orbital Eccentricity (pl_orbeccen)", 'value': "pl_orbeccen"}, # (Not in values dataset)
        {'label': "Insol. Flux (pl_insol)", 'value': "pl_insol"}, # in Earth flux (Not in values dataset)
        {'label': "Equil. Temp. (pl_eqt)", 'value': "pl_eqt"},  # in Kelvin (Not in values dataset)
        {'label': "Transit Duration (pl_trandur)", 'value': "pl_trandur"}, # in hours (Not in values dataset)
        {'label': "Transit Midpoint (pl_tranmid)", 'value': "pl_tranmid"}, # in days (Not in values dataset)
        {'label': "Transit Depth (pl_trandep)", 'value': "pl_trandep"}, # percentage (Not in values dataset)
        {'label': "Impact Parameter (pl_imppar)", 'value': "pl_imppar"}, # (Not in values dataset)
        {'label': "Occulation Depth (pl_occdep)", 'value': "pl_occdep"}, # percentage (Not in values dataset)
        {'label': "Rad. Velocity Amplitude (pl_rvamp)", 'value': "pl_rvamp"}, # in m/s (ot in values dataset)
        {'label': "Discovery Year (disc_year)", 'value': "disc_year"},
        # {'label': "Last Update", 'value': "rowupdate"}, # last update of parameters
        # {'label': "Public Release Date", 'value': "releasedate"}, # date publicly released
    ], # note: talk to achyutan
    'Stellar': [
        {'label': "Effec. Temp. (st_teff)", 'value': "st_teff"}, # in Kelvin
        {'label': "Radius (st_rad)", 'value': "st_rad"}, # in Solar radiuses
        {'label': "Mass (st_mass)", 'value': "st_mass"},
        {'label': "Density (st_dens)", 'value': "st_dens"},
        {'label': "Surface Grav. (st_logg)", 'value': "st_logg"},
        {'label': "Age (st_age)", 'value': "st_age"}, # in gigayears
        {'label': "Rot. Period (st_rotp)", 'value': "st_rotp"},
        {'label': "Rot. Velocity (st_vsin)", 'value': "st_vsin"},
        {'label': "Rad. Velocity (st_radv)", 'value': "st_radv"},
        {'label': "Metallicity (st_met)", 'value': "st_met"},
        {'label': "Luminosity (st_lum)", 'value': "st_lum"},
    ],
    'System': [
        {'label': "Parallax (sy_plx)", 'value': "sy_plx"},
        {'label': "Dist from Earth (sy_dist)", 'value': "sy_dist"}, # in parsecs
        {'label': "u (Sloan) Magnitude (sy_umag, ~354 nm)", 'value': "sy_umag"},
        {'label': "B (Johnson) Magnitude (sy_bmag, ~442 nm)", 'value': "sy_bmag"},
        {'label': "g (Sloan) Magnitude (sy_gmag, ~475 nm)", 'value': "sy_gmag"},
        {'label': "V (Johnson) Magnitude (sy_vmag, ~540 nm)", 'value': "sy_vmag"},
        {'label': "Kepler Magnitude (sy_kepmag, ~600 nm)", 'value': "sy_kepmag"},
        {'label': "r (Sloan) Magnitude (sy_rmag, ~622 nm)", 'value': "sy_rmag"},
        {'label': "Gaia Magnitude (sy_gaiamag, ~673 nm)", 'value': "sy_gaiamag"},
        {'label': "i (Sloan) Magnitude (sy_imag, ~763 nm)", 'value': "sy_imag"},
        {'label': "I (Cousins) Magnitude (sy_icmag, ~786.5 nm)", 'value': "sy_icmag"},
        {'label': "TESS Magnitude (sy_tmag, ~800 nm)", 'value': "sy_tmag"},
        {'label': "z (Sloan) Magnitude (sy_zmag, ~905 nm)", 'value': "sy_zmag"},
        {'label': "J (2MASS) Magnitude (sy_jmag, ~1.25 μm)", 'value': "sy_jmag"},
        {'label': "H (2MASS) Magnitude (sy_hmag, ~1.65 μm)", 'value': "sy_hmag"},
        {'label': "Ks (2MASS) Magnitude (sy_kmag, ~2,15 μm)", 'value': "sy_kmag"},
        {'label': "W1 (WISE) Magnitude (sy_w1mag, ~3.4 μm)", 'value': "sy_w1mag"},
        {'label': "W2 (WISE) Magnitude (sy_w2mag, ~4.6 μm)", 'value': "sy_w2mag"},
        {'label': "W3 (WISE) Magnitude (sy_w3mag, ~12 μm)", 'value': "sy_w3mag"},
        {'label': "W4 (WISE) Magnitude (sy_w4mag, ~22 μm)", 'value': "sy_w4mag"},
    ]
}
initial_values = ['pl_dens', None, None]  # Only one group should have a value

app.layout = html.Div([
    html.H1("Density Histogram/Dotplot"),

    html.H2("Data filters:"),
    dcc.Checklist(
        id='filter-checklist',
        options=[
            {'label': 'Has atmospheric data', 'value': 'atmoData'},
            {'label': 'Default parameter set', 'value': 'default'},
            {'label': 'No controversial flag', 'value': 'noControv'}
        ],
        value=['default', 'noControv'],  # Default values to filter by
        className="checkbox-container"
    ),
    dcc.Checklist(
        id='simplification-checkbox',
        options=[
            {'label': html.Span(['Simplify planet types?'], style={'fontWeight': 'bold'}), 'value': 'box'},
        ],
        className="checkbox-container"
    ),

    html.H2("Select a feature:"),
    *[
        html.Div([
            html.H4(category),
            dcc.RadioItems(
                id={'type': 'feature-radioitems', 'index': i},
                options=options,
                value=initial_values[i],
                className="checkbox-container" 
            )
        ]) for i, (category, options) in enumerate(planet_features.items())
    ],

    html.H2("Min Value:"),
    dcc.Slider(
        id="min-value-slider",
        min=0,
        max=100,  # Will update dynamically
        step=None,
        value=0,
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    html.H2("Max Value:"),
    dcc.Slider(
        id="max-value-slider",
        min=0,
        max=100,  # Will update dynamically
        step=None,
        value=100,
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    dcc.Tabs(id="tabs", value="tab-1", children=[
        dcc.Tab(label="Histogram", value="tab-1", className="tab"),
        dcc.Tab(label="Dotplot", value="tab-2", className="tab"),
    ]),
    html.Div(id="tabs-content", children=[
        dcc.Graph(id="histogram", figure={}, style={"display": "block"}),
        dcc.Graph(id="dotplot", figure={}, style={"display": "none"}),
    ]),  # This will hold the selected graph
], style={'color': 'black', 'font-family': 'Arial', 'backgroundColor': 'white', 'padding': '20px', 'text-align': 'center'})

@app.callback(
    [Output('histogram', 'figure'),
     Output('dotplot', 'figure'),],
    [Input('filter-checklist', 'value'),
     Input('simplification-checkbox', 'value'),
     Input({'type': 'feature-radioitems', 'index': ALL}, 'value'),
     Input('min-value-slider', 'value'),
     Input('max-value-slider', 'value')]
)
def update_graph(filters, simple, feature_values, min_value, max_value):
    # Gets the selected feature from all lists
    feature = next((f for f in feature_values if f is not None), None)
    if feature is None:
        raise dash.exceptions.PreventUpdate

    # Load dataset where values of feature are between designated min and max
    df = pl_s[(pl_s[feature] >= min_value) & (pl_s[feature] <= max_value)]

    # Apply filters
    if 'atmoData' in filters:
        df = df[df['pl_name'].isin(at_s['PL_NAME'])]
    if 'default' in filters:
        df = df[df['default_flag'] == True]
    if 'noControv' in filters:
        df = df[df['pl_controv_flag'] == False]
    if simple:
        df['pl_type'] = df['pl_type'].replace({'super_earth': 'terrestrial'})
        df['pl_type'] = df['pl_type'].replace({'neptune_like': 'gas_giant'})

    # Returns empty graphs if filtered dataset is empty
    if df.empty:
        return go.Figure(), go.Figure()

    # Generate Plotly graphs
    histogram = px.histogram(df, x=feature, nbins=100, color='pl_type')
    dotplot = px.scatter(df, x=feature, color='pl_type', hover_data=['pl_name', 'discoverymethod', 'disc_facility'])
    dotplot.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
    )

    return histogram, dotplot

@app.callback(
    [Output('histogram', 'style'),
     Output('dotplot', 'style'),],
    [Input("tabs", "value")]
)
def update_tabs(tab):
    styles = {
        "tab-1": [{"display": "block"}, {"display": "none"}],
        "tab-2": [{"display": "none"}, {"display": "block"}],
    }
    return styles.get(tab, [{"display": "block"}, {"display": "none"}])
    
# Code to update slider min and max values
@app.callback(
    [Output('min-value-slider', 'min'),
     Output('min-value-slider', 'max'),
     Output('min-value-slider', 'value'),
     Output('min-value-slider', 'step'),
     Output('max-value-slider', 'min'),
     Output('max-value-slider', 'max'),
     Output('max-value-slider', 'value'),
     Output('max-value-slider', 'step'),],
    [Input({'type': 'feature-radioitems', 'index': ALL}, 'value')]
)
def update_sliders(feature_values):
    feature = next((f for f in feature_values if f is not None), None)
    if feature is None:
        raise dash.exceptions.PreventUpdate

    # Get min and max values for the selected feature and round them
    feature_min = np.floor(pl_s[feature].min())
    feature_max = np.ceil(pl_s[feature].max())
    step = (feature_max-feature_min)/20

    return (
        feature_min, feature_max, feature_min, step,
        feature_min, feature_max, feature_max, step
    )

# Code to ensure all radioLists are treated as the same one.
@app.callback(
    Output({'type': 'feature-radioitems', 'index': ALL}, 'value'),
    Input({'type': 'feature-radioitems', 'index': ALL}, 'value'),
    prevent_initial_call=True
)
def enforce_single_selection(values):
    triggered = ctx.triggered_id
    new_values = [None] * len(values)
    if triggered:
        idx = triggered['index']
        new_values[idx] = values[idx]
    return new_values

if __name__ == '__main__':
    app.run(debug=True, port=8053)

[2025-06-19 14:40:01,679] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\flask\app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\dash.py", line 1414, in dispatch
    ctx.run(
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 536, in add_context
    raise err
  File "c:\Users\tmari\miniforge3\Lib\site-packages\dash\_callback.py", line 525, in add_context
    output_value = _invoke_callback(func, *func_args, **func_kwargs)  # type: ignore[reportArgumentType]
               

In [15]:
# Created a 3D exoplanet map. Credits to Microsoft Copilot for most of the code.
# To do: add representation of Earth, more info in hovertext
# 2d: change size of dots, overlay earth map for reference
# Poster: add the starmaps first, with some labels.

import pandas as pd
import numpy as np
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go
import plotly.express as px

# Imports exoplanet classification lists
%run -i assets/lists.ipynb

# imports Planetary systems csv and adds pl_type column based on classification lists
pl_s = pd.read_csv('assets/Planetary_Systems.csv', comment="#", low_memory=False)
pl_s["pl_type"] = pl_s["pl_name"].apply(
    lambda x: "terrestrial" if x in terrestrial # type: ignore
    else "super_earth" if x in super_earths # type: ignore
    else "unknown" if x in unknown # type: ignore
    else "neptune_like" if x in neptune_like # type: ignore
    else "gas_giant" if x in gas_giants # type: ignore
    else "none"
)

# imports atmospheric list for one filter.
at_s = pd.read_csv('assets/Atmospheric_Spectroscopy.csv', comment="#", low_memory=False)

# Dash app
app = Dash(__name__, external_stylesheets=["assets/style.css"])

app.layout = html.Div([
    html.H1("3D Exoplanet \"Starmap\""),

    html.H2("Data filters:"),
    dcc.Checklist(
        id='filter-checklist',
        options=[
            {'label': 'Has atmospheric data', 'value': 'atmoData'},
            {'label': 'Default parameter set', 'value': 'default'},
            {'label': 'No controversial flag', 'value': 'noControv'}
        ],
        value=['default', 'noControv'],  # Default values to filter by
        className="checkbox-container"
    ),
    dcc.Checklist(
        id='simplification-checkbox',
        options=[
            {'label': html.Span(['Simplify planet types?'], style={'fontWeight': 'bold'}), 'value': 'box'},
        ],
        className="checkbox-container"
    ),

    html.H2("How to handle NaN values:"),
    dcc.RadioItems(
        id="nanhandle-radioitems",
        options=[
            {'label': 'Set to 0', 'value': 'zero'},
            {'label': 'Set to mean', 'value': 'mean'},
            {'label': 'Set to median', 'value': 'median'},
        ],
        value="median",
        className="checkbox-container"
    ),

    html.H2("Min Radius:"),
    dcc.Slider(
        id="min-radius-slider",
        min=0,
        max=10000,
        step=100,
        value=1000,
        marks={i: str(i) for i in range(0, 10001, 1000)},
        tooltip={"placement": "bottom", "always_visible": True}
    ),

    dcc.Tabs(id="tabs", value="tab-1", children=[
        dcc.Tab(label="3D Starmap", value="tab-1", className="tab"),
        dcc.Tab(label="2D Starmap", value="tab-2", className="tab"),
    ]),
    html.Div(id="tabs-content", children=[
        dcc.Graph(id="3d-starmap", figure={}, style={"display": "block"}),
        dcc.Graph(id="2d-starmap", figure={}, style={"display": "none"}),
    ]),  # This will hold the selected graph
], style={'color': 'black', 'font-family': 'Arial', 'backgroundColor': 'white', 'padding': '20px', 'text-align': 'center'})

@app.callback(
    [Output('3d-starmap', 'figure'),
     Output('2d-starmap', 'figure'),],
    [Input('filter-checklist', 'value'),
     Input('simplification-checkbox', 'value'),
     Input('min-radius-slider', 'value'),
     Input('nanhandle-radioitems', 'value')]
)
def update_graph(filters, simple, minRad, nanhandle):
    # Load dataset
    df = pl_s

    # Apply filters
    if 'atmoData' in filters:
        df = df[df['pl_name'].isin(at_s['PL_NAME'])]
    if 'default' in filters:
        df = df[df['default_flag'] == True]
    if 'noControv' in filters:
        df = df[df['pl_controv_flag'] == False]
    if simple:
        df['pl_type'] = df['pl_type'].replace({'super_earth': 'terrestrial'})
        df['pl_type'] = df['pl_type'].replace({'neptune_like': 'gas_giant'})

    # Returns empty graphs if filtered dataset is empty
    if df.empty:
        return go.Figure(), go.Figure()
    
    # Replace NaNs with the value given by nanhandle
    if nanhandle == 'zero':
        df['sy_dist'].fillna(0, inplace=True)
    elif nanhandle == 'mean':
        df['sy_dist'].fillna(df['sy_dist'].mean(), inplace=True)
    elif nanhandle == 'median':
        df['sy_dist'].fillna(df['sy_dist'].median(), inplace=True)

    # Defines data columns to be used in calculations
    ra = df['ra'] # Think of this as longitude
    dec = df['dec'] # "latitude"
    sy_dist = df['sy_dist'] # Distance from Earth in parsecs
    pl_type = df["pl_type"]

    # **Graph 1: 3D Starmap**
    # If distance is below minRad, sets distance to minRad on the map (doesn't change the data)
    if minRad == 0:
        dist=sy_dist
    else:
        dist= np.where(sy_dist <= minRad, 1, sy_dist / minRad)
    # Convert spherical coordinates (RA, Dec, Distance) to Cartesian (X, Y, Z)
    df['x'] = dist * np.cos(np.radians(dec)) * np.cos(np.radians(ra))
    df['y'] = dist * np.cos(np.radians(dec)) * np.sin(np.radians(ra))
    df['z'] = dist * np.sin(np.radians(dec))
    
    # Color legend
    color_map = {
        "terrestrial": "blue",
        "super_earth": "red",
        "neptune_like": "green",
        "gas_giant": "purple",
        "unknown": "yellow",
        "none": "black"
    }
    fig3d = go.Figure()
    for category, color in color_map.items():
        filtered_df = df[pl_type == category]
        # Hovertext
        hover_text = [
            f"Planet: {name}<br>RA: {ra_val:.2f}°, Dec: {dec_val:.2f}°<br>Dist. from Earth: {dist_val:.2f} pc"
            for name, ra_val, dec_val, dist_val in zip(
                filtered_df['pl_name'],
                filtered_df['ra'],
                filtered_df['dec'],
                filtered_df['sy_dist']
            )
        ]
        # Actually adds the dots
        fig3d.add_trace(go.Scatter3d(
            x=filtered_df["x"],
            y=filtered_df["y"],
            z=filtered_df["z"],
            mode="markers",
            marker=dict(size=2, color=color),
            name=category,
            text=hover_text,
            hoverinfo='text'
        ))
    # Improve legend visibility and title
    fig3d.update_layout(
        legend_title="Planet Type",
        legend=dict(
            x=0, y=1,  # Positioning
            bgcolor="rgba(255, 255, 255, 0.5)",  # Semi-transparent background
            borderwidth=1
        ),
        scene=dict(
            xaxis=dict(showgrid=False, showticklabels=False, visible=False),
            yaxis=dict(showgrid=False, showticklabels=False, visible=False),
            zaxis=dict(showgrid=False, showticklabels=False, visible=False),
            annotations=[]
        ),
        scene_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        margin=dict(l=0, r=0, b=0, t=0)
    )
    fig3d.update_traces(
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
    )
    
    # **Graph 2: 2D Starmap**
    fig2d = px.scatter(df, x = 'ra', y = 'dec', color='pl_type', size=1 / (df['sy_dist'] + 1), hover_data=["pl_name"])
    fig2d.update_layout(scene=dict(xaxis_title='RA', yaxis_title='Dec'), coloraxis_colorbar=dict(title="Distance (pc)"))
    fig2d.update_traces(
        marker=dict(line=dict(width=0)),
        hoverlabel=dict(
            font=dict(color="black"),
            bgcolor="white",
        ),
    )

    return fig3d, fig2d

@app.callback(
    [Output('3d-starmap', 'style'),
     Output('2d-starmap', 'style'),],
    [Input("tabs", "value")]
)
def update_tabs(tab):
    styles = {
        "tab-1": [{"display": "block"}, {"display": "none"}],
        "tab-2": [{"display": "none"}, {"display": "block"}],
    }
    return styles.get(tab, [{"display": "block"}, {"display": "none"}])

# @app.callback(
#     Output('2d-starmap', 'figure'),
#     Input('2d-starmap', 'relayoutData')
# )
# def update_marker_size(relayout_data):
#     zoom_factor = relayout_data.get('xaxis.range[1]', 1) - relayout_data.get('xaxis.range[0]', 0)
#     new_size = 10 * (1 / zoom_factor)  # adjust scaling logic
#     fig = px.scatter(df, x='ra', y='dec', size=[new_size]*len(df))
#     return fig


if __name__ == '__main__':
    app.run(debug=True, port=8054)

In [None]:
import sys
print(sys.executable)

# %run -i assets/lists.ipynb

# print(terrestrial)

c:\Users\tmari\miniforge3\python.exe
['KOI-1843.03', 'Barnard e', 'Barnard c', 'Barnard d', 'GJ 341 b', 'HD 101581 b', 'HD 101581 c', 'KOI-4978 b', 'Kepler-879 c', 'Kepler-1489 c', 'Kepler-963 c', 'Kepler-158 d', 'Barnard b', 'GJ 238 b', 'TOI-4527.01', 'SPECULOOS-3 b', 'LHS 1678 d', "Teegarden's Star d", 'LHS 475 b', 'Kepler-1982 b', 'Kepler-1990 c', 'Kepler-1992 b', 'Kepler-1994 b', 'Kepler-1998 b', 'Kepler-1869 c', 'Kepler-290 d', 'Kepler 865-c', 'TOI-700 e', 'Kepler-138 e', 'K2-411 b', 'K2-413 b', 'HD 23472 d', 'HD 23472 e', 'KMT-2020-BLG-0414L b', 'Kepler-1693 c', 'Kepler-1934 b', 'Kepler-1130 c', 'Kepler-1890 b', 'Kepler-1898 b', 'Kepler-1957 b', 'Kepler-1877 b', 'Kepler-1907 b', 'Kepler-1130 d', 'Kepler-1759 b', 'Kepler-1800 b', 'Kepler-1967 b', 'Kepler-1850 b', 'Kepler-352 d', 'Kepler-1864 b', 'Kepler-1963 b', 'GJ 367 b', 'LHS 1678 b', 'EPIC 220492298 b', 'EPIC 201757695.02', 'TOI-540 b', 'Kepler-1689 b', 'K@-315 b', 'TOI-700 b', 'L 98-59 b', 'EPIC 206317286 b', 'EPIC 201497682 

In [1]:
import streamlit as st
import pandas as pd
%run -i .venv/Scripts/Activate.ps1

st.write("Hello world")
# df = pd.DataFrame({
#   'first column': [1, 2, 3, 4],
#   'second column': [10, 20, 30, 40]
# })

# df

TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates