## GENERAL SETUP

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# load local version of gallifrey, before pip installed version, for debugging
import pathlib
import sys
import os

sys.path.append(str(pathlib.Path(os.getcwd()).parent.joinpath("src")))

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from gallifrey.planets import PlanetModel
from gallifrey.utilities.dataframe import rename_labels
from gallifrey.visualization.seaborn import (
    set_plot_defaults,
    get_palette,
    SeabornFigure,
)

## PLOT SETUP

In [4]:
print(
    "RIDGE PLOT 1: ALL CATEGORIES AT ONE FIXED [FE/H] AND OTHER VARIABLES SAMPLED -> INTRINSIC VARIATIONS UNRELATED TO METALLICITY"
)
print(
    "RIDGE PLOT 2: ONE CATEGORY, ALL VARIABLES SAMPLES, DIFFERENT STAR MASSES -> VARIATIONS WITH STAR MASS (VERY DIFFERENT FOR DIFFERENT CATEGORIES, NEED ANOTHER PLOT TO SHOW THAT"
)
print(
    "ADD MODEL WHERE METALLICITIES BELOW -0.5 GET 0 PLANETS, SEE HOW MUCH THAT CHANGES (PHRASE IT AS TWO EXTREMES)"
)

RIDGE PLOT 1: ALL CATEGORIES AT ONE FIXED [FE/H] AND OTHER VARIABLES SAMPLED -> INTRINSIC VARIATIONS UNRELATED TO METALLICITY
RIDGE PLOT 2: ONE CATEGORY, ALL VARIABLES SAMPLES, DIFFERENT STAR MASSES -> VARIATIONS WITH STAR MASS (VERY DIFFERENT FOR DIFFERENT CATEGORIES, NEED ANOTHER PLOT TO SHOW THAT
ADD MODEL WHERE METALLICITIES BELOW -0.5 GET 0 PLANETS, SEE HOW MUCH THAT CHANGES (PHRASE IT AS TWO EXTREMES)


In [5]:
set_plot_defaults()

## REDUCING DIMENSIONALITY

In [6]:
figure_directory = "Planet_model"

num_samples = int(1e5)

num_embryos = 50
host_star_mass = 1
age = int(1e8)

planet_model = PlanetModel(num_embryos)
population_id = planet_model.get_population_id(num_embryos, host_star_mass)

categories = [
    category
    for category in planet_model.categories
    if category not in ["Dwarf", "D-Burner"]
]

In [7]:
# create uniform sample of relevant monte carlo variables within parameter bounds
included_variables = ("log_initial_mass", "[Fe/H]")

bounds = {
    key: planet_model.get_systems(population_id).bounds[key]
    for key in included_variables
}
samples = np.column_stack(
    [np.random.uniform(bound[0], bound[1], num_samples) for bound in bounds.values()]
)
samples = pd.DataFrame(samples, columns=bounds.keys())
samples["age"] = age

In [8]:
prediction_parameter_dict = {
    "categories": categories,
    "host_star_mass": host_star_mass,
    "variables": samples,
    "return_full": False,
}

full_model = planet_model.prediction(**prediction_parameter_dict)
small_model = planet_model.prediction(
    **prediction_parameter_dict, included_variables=included_variables
)

model_difference = full_model - small_model

In [9]:
model_difference.describe()

Unnamed: 0,Earth,Super-Earth,Neptunian,Sub-Giant,Giant
count,100000.0,100000.0,100000.0,100000.0,100000.0
mean,0.231453,0.129727,-0.00644,0.004257,-0.009963
std,1.974873,1.451005,0.484897,0.19866,0.335862
min,-9.0,-8.333333,-3.666667,-1.666667,-2.333333
25%,-0.666667,-0.333333,0.0,0.0,0.0
50%,0.333333,0.0,0.0,0.0,0.0
75%,1.333333,0.666667,0.0,0.0,0.0
max,10.666667,8.666667,3.666667,2.0,2.333333


## INTERPOLATION BEHAVIOUR

In [10]:
figure_directory = "Planet_model"

num_embryos = 50
category = "Giant"
host_star_mass = 1
age = int(1e8)
num_samples = int(1e6)

planet_model = PlanetModel(num_embryos)

population_id = planet_model.get_population_id(num_embryos, host_star_mass)

### Uniform Sampling Of Monte Carlo Variables

In [11]:
# create uniform sample of monte carlo variables within parameter bounds
bounds = planet_model.get_systems(population_id).bounds
samples = np.column_stack(
    [np.random.uniform(bound[0], bound[1], num_samples) for bound in bounds.values()]
)
samples = pd.DataFrame(samples, columns=bounds.keys())
samples["age"] = age

In [12]:
# predict planet number for these random variables
result = planet_model.prediction(
    category,
    host_star_mass,
    variables=samples,
    return_full=True,
)

# result[category] = result[category].replace(0, 1e-5) # adding tiny amount to value=0 category, in order for plot to have a lower limit at 0
# result["Planet Bins"] = pd.cut(result[category], bins=[0, 1, 2, 3]) # binning planet number predictions for better plotting

result, labels = rename_labels(result)  # rename monte carlo variable names in dataframe

In [13]:
print(
    "this plot doesnt work yet, you want something else. bc right now you are calculating the number on points rather than the value of the 4d function, the way seaborn plots this is by weird overlaps"
)
print(
    "see if you can actually cut down model to 2 parameters, that would make it easy. otherwise you have to find a good way to represent the 4d function here here"
)

# could add {'stat':'density', 'common_norm':False} to normalise diag plots
# pairplot = sns.pairplot(
#    result.drop(columns=["age", category]),
#    hue="Planet Bins",
#    kind="hist",
#    diag_kws={"element": "step"},
#    #palette=get_palette(n_colors=3),
# )
# SeabornFigure(pairplot).save("planet_model_interpolation.pdf", sub_directory=figure_directory)

this plot doesnt work yet, you want something else. bc right now you are calculating the number on points rather than the value of the 4d function, the way seaborn plots this is by weird overlaps
see if you can actually cut down model to 2 parameters, that would make it easy. otherwise you have to find a good way to represent the 4d function here here
