In [112]:
import pandas as pd
import altair as alt
import requests

In [113]:
df = pd.read_csv("data/sbdb_query_results.csv")
df.head()
df = df.rename(columns={
    'per_y': 'P (yr)',
    'moid': 'MOID (AU)',
    'q': 'q (AU)',
    'e': 'e',
    'i': 'i (deg)',
    'epoch_cal': 'Epoch'
})
df = df[df["P (yr)"] < 25]
df

Unnamed: 0,full_name,ad,tp_cal,e,q (AU),i (deg),om,w,P (yr),class,...,A3,DT,name,epoch,Epoch,a,MOID (AU),moid_ld,first_obs,last_obs
1,2P/Encke,4.10,2023-10-21.7,0.8483,0.337,11.47,334.27,187.05,3.31,ETc,...,,,Encke,2459780.5,2022-07-20.0,2.220,0.168000,65.300,2018-11-05,2025-06-29
2,3D/Biela,6.19,1832-11-26.6,0.7513,0.879,13.22,250.67,221.66,6.65,JFc,...,,,Biela,2390520.5,1832-12-03.0,3.535,0.000518,0.202,,
3,4P/Faye,6.02,2021-09-05.6,0.5845,1.578,8.16,194.80,205.99,7.40,JFc,...,-7.100000e-10,-37.7,Faye,2458522.5,2019-02-08.0,3.798,0.589000,229.000,2013-05-23,2023-03-25
4,5D/Brorsen,5.61,1879-03-31.0,0.8098,0.590,29.38,102.97,14.95,5.46,JFc,...,,,Brorsen,2407440.5,1879-04-01.0,3.101,0.367000,143.000,,
5,6P/d'Arrest,5.64,2021-09-17.7,0.6127,1.355,19.51,138.94,178.11,6.54,JFc,...,,,d'Arrest,2459302.5,2021-03-29.0,3.497,0.343000,133.000,2014-03-10,2022-03-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3986,P/2024 T2 (Rankin),10.38,2024-12-08.3,0.6807,1.972,12.93,113.06,343.83,15.30,JFc,...,,,Rankin,2460691.5,2025-01-16.0,6.176,0.992000,386.000,2024-10-04,2025-05-02
3995,P/2024 X3 (PANSTARRS),11.38,2024-09-05.7,0.6263,2.614,2.97,113.36,352.94,18.50,JFc,...,,,PANSTARRS,2460703.5,2025-01-28.0,6.995,1.630000,635.000,2024-10-22,2025-05-18
3999,P/2025 A2 (PANSTARRS),6.73,2024-10-05.6,0.3224,3.446,20.73,189.32,278.03,11.50,JFc,...,,,PANSTARRS,2460697.5,2025-01-22.0,5.086,2.550000,992.000,2024-11-27,2025-04-02
4005,P/2025 C1 (ATLAS),5.64,2025-02-06.5,0.3454,2.746,7.52,9.10,186.79,8.59,JFc,...,,,ATLAS,2460731.5,2025-02-25.0,4.195,1.750000,679.000,2025-02-02,2025-06-12


In [114]:
df = df[
    df['P (yr)'].notna() &
    df['e'].notna() &
    df['i (deg)'].notna() &
    df['q (AU)'].notna() &
    df['MOID (AU)'].notna()
    ]

df['Epoch'] = pd.to_datetime(df['Epoch'].str.replace('.0', '', regex=False), format='%Y-%m-%d', errors='coerce')
df = df[df['Epoch'].notna()]
df['Decade'] = (df['Epoch'].dt.year // 10) * 10
df['Period Class'] = df['P (yr)'].apply(lambda x: 'Short (<20yr)' if x < 20 else 'Long (≥20yr)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Epoch'] = pd.to_datetime(df['Epoch'].str.replace('.0', '', regex=False), format='%Y-%m-%d', errors='coerce')


In [115]:
alt.Chart(df).mark_circle(opacity=0.6).encode(
    x=alt.X('q (AU):Q', title='Perihelion Distance (AU)'),
    y=alt.Y('P (yr):Q', title='Orbital Period (yrs)', scale=alt.Scale(type='log')),
    color=alt.Color('e:Q', title='Eccentricity', scale=alt.Scale(scheme='redblue')),
    size=alt.Size('MOID (AU):Q', title='Distance from Earth (AU)', scale=alt.Scale(range=[10, 500], reverse=True)),
    tooltip=[
        alt.Tooltip('full_name:N', title='Comet:'),
        alt.Tooltip('Epoch:T', title='Date Discovered:'),
        alt.Tooltip('P (yr):Q', title='Period (yr):'),
        alt.Tooltip('q (AU):Q', title='Perihelion Distance (AU):'),
        alt.Tooltip('e:Q', title='Eccentricity:'),
        alt.Tooltip('MOID (AU):Q', title='Earth Distance (AU):')
    ]
).properties(
    title="Comet Orbital Period vs Perihelion Distance",
    width=600,
    height=400
)

In [116]:
#bind = alt.selection_interval(bind='scales', encodings=["x"])
selection = alt.selection_interval()

alt.Chart(df).mark_circle(opacity=0.6).add_params(
    #bind,
    selection
).encode(
    x=alt.X('q (AU):Q', title='Perihelion Distance (AU)'),
    y=alt.Y('P (yr):Q', title='Orbital Period (yrs)'), #scale=alt.Scale(type='log')),
    color=alt.Color('e:Q', title='Eccentricity', scale=alt.Scale(scheme='redblue')),
    size=alt.Size('MOID (AU):Q', title='Distance from Earth (AU)', scale=alt.Scale(range=[10, 500], reverse=True)),
    tooltip=[
        alt.Tooltip('full_name:N', title='Comet:'),
        alt.Tooltip('Epoch:T', title='Date Discovered:'),
        alt.Tooltip('P (yr):Q', title='Period (yr):'),
        alt.Tooltip('q (AU):Q', title='Perihelion Distance (AU):'),
        alt.Tooltip('e:Q', title='Eccentricity:'),
        alt.Tooltip('MOID (AU):Q', title='Earth Distance (AU):')
    ]
).properties(
    title="Comet Orbital Period vs Perihelion Distance",
    width=600,
    height=400
)

In [117]:
#bind = alt.selection_interval(bind='scales', encodings=["x"])
selection = alt.selection_interval()

alt.Chart(df).mark_circle(opacity=0.6).add_params(
    #bind,
    selection
).encode(
    x = alt.X('q (AU):Q', title='Perihelion Distance (AU)'),
    y = alt.Y('P (yr):Q', title='Orbital Period (yrs)'), #scale=alt.Scale(type='log')),
    color = alt.condition(selection, "e:Q", alt.value("lightgray"), scale = alt.Scale(scheme="viridis"), title="Eccentricity"),
    size = alt.Size('MOID (AU):Q', title='Distance from Earth (AU)', scale=alt.Scale(range=[10, 500], reverse=True)),
    tooltip = [
        alt.Tooltip('full_name:N', title='Comet:'),
        alt.Tooltip('Epoch:T', title='Date Discovered:'),
        alt.Tooltip('P (yr):Q', title='Period (yr):'),
        alt.Tooltip('q (AU):Q', title='Perihelion Distance (AU):'),
        alt.Tooltip('e:Q', title='Eccentricity:'),
        alt.Tooltip('MOID (AU):Q', title='Earth Distance (AU):')
    ]
).properties(
    title="What characteristics of Near Earth Comets affect its orbital period?",
    width=600,
    height=400
)

In [118]:
selection = alt.selection_interval()
dropdown = alt.binding_select(options=sorted(df["Decade"].unique()), name="Decade:")
decade_select = alt.selection_point(fields=['Decade'], bind=dropdown)

alt.Chart(df).mark_circle(opacity=0.6).add_params(
    selection,
    decade_select
).transform_filter(
    decade_select
).encode(
    x = alt.X('q (AU):Q', title='Perihelion Distance (AU)'),
    y = alt.Y('P (yr):Q', title='Orbital Period (yrs)'), #scale=alt.Scale(type='log')),
    color = alt.condition(selection, "e:Q", alt.value("lightgray"), scale = alt.Scale(scheme="viridis"), title="Eccentricity"),
    size = alt.Size('MOID (AU):Q', title='Distance from Earth (AU)', scale=alt.Scale(range=[10, 500], reverse=True)),
    tooltip = [
        alt.Tooltip('full_name:N', title='Comet:'),
        alt.Tooltip('Epoch:T', title='Date Discovered:'),
        alt.Tooltip('P (yr):Q', title='Period (yr):'),
        alt.Tooltip('q (AU):Q', title='Perihelion Distance (AU):'),
        alt.Tooltip('e:Q', title='Eccentricity:'),
        alt.Tooltip('MOID (AU):Q', title='Earth Distance (AU):')
    ]
).properties(
    title="What characteristics of Near Earth Comets affect its orbital period?",
    width=600,
    height=400
)