In [1]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), ".."))
main_dir = os.path.abspath('..')
os.chdir(main_dir)
sys.path.append(main_dir)

import numpy as np
from sklearn.model_selection import train_test_split
from PhysicsRegression import PhyReg

### Step1: Load SSN data

We keep the data from year 1976 to 2019 (cycle 21 to 24) as testing data, and use the others as training data. We also select different amount of data for training to derive a consist conclusion.

In [2]:
x_to_fit = []
y_to_fit = []

np.random.seed(2024)

with open("./data/physics_data/SN_m_tot_V2.0.txt", "r") as fi:
    context = fi.read().split("\n")
context = [c.split() for c in context][:-1]
times = np.array([float(c[2]) for c in context])
nums = np.array([float(c[3]) for c in context])

c_times = times[1280: 2730]
c_nums =  nums [1280: 2730]
min_times1 = np.min(c_times)
max_times1 = np.max(c_times)
c_times = (c_times - min_times1) / (max_times1 - min_times1) * 8
x_to_fit.append(c_times.reshape((-1, 1)))
y_to_fit.append(c_nums.reshape((-1, 1)))

x_train, x_test, y_train, y_test = train_test_split(x_to_fit[0], y_to_fit[0], test_size=0.15, random_state=2024)
x_to_fit[0] = x_train
y_to_fit[0] = y_train

### Step2: Inference with PhyReg

We begin with the first dataset, i.e. from year 1855 to 1976.

In [3]:
phyreg = PhyReg(
    path = "./model.pt",
    max_len=1000,
)

phyreg.fit(
    x_to_fit[:1], y_to_fit[:1], 
    use_Divide=True, 
    use_MCTS=False, 
    use_GP=False, 
    use_pysr_init=True, 
    use_const_optimization=False,
    verbose=True,
    oracle_name="physical1",
    oracle_file="./physical/data/oracle_model_case1/",
    oracle_bs=36, oracle_lr=0.002, oracle_epoch=1000,
    use_seperate_type=["id"]
)

Training oracle Newral Network...
Generating formula through End-to-End...
Finished forward in 5.519245862960815 secs
Removed 1/2 skeleton duplicata
Removed 1/2 skeleton duplicata
Back aggregating formulas...
idx : 0
expr: (72.24645455022423 + (0.019186979204434913 * (47.4668916494141 * (pi * ((sin((-0.8650148973530852 * x_0)))**2 * 1/(((sin((-0.4567643998031946 * x_0)))**2 + (cos((-0.23506788897781172 * x_0)))**2)))))))
mse : 3666.172029435825



The best results is "C_0 + C_1 * sin(C_2 * x_0) ** 2 / (sin(C_3 * x_0) ** 2 + cos(C_4 * x_0) ** 2)".

In [4]:
best_gens = phyreg.best_gens
expr = str(best_gens[0]["predicted_tree"])
expr = expr.replace("x_0", f"(x_0 - {min_times1}) / {(max_times1 - min_times1)/8}")

phyreg.express_best_gens(best_gens, use_sp=True)
phyreg.express_skeleton(best_gens, use_sp=True)

idx : 0
expr: 72.2464545502242 + 2.86119376905125*sin(0.8650148973530852*x_0)**2/(sin(0.4567643998031946*x_0)**2 + cos(0.23506788897781172*x_0)**2)
mse : 3666.172029435825

idx          : 0
expr skeleton: C_0 + C_1*sin(C_2*x_0)**2/(sin(C_3*x_0)**2 + cos(C_4*x_0)**2)
constants    : 72.246 2.861 0.865 0.457 0.235



Let's modify a few coefficient and optimized the constants:

In [5]:
best_gens[0]["predicted_tree"] = "280.86119376905125*sin(0.8650148973530852*x_0)**2/(1.0 + 1.0*sin(0.4567643998031946*x_0)**2 + 1.0*cos(0.23506788897781172*x_0)**2)"
phyreg.params.num_bfgs = 100
best_gens = phyreg.constant_optimization(best_gens, x_to_fit, y_to_fit)
phyreg.express_best_gens(best_gens, use_sp=True)
phyreg.express_skeleton(best_gens, use_sp=True)

idx : 0
expr: 289.216021624957*sin(2.369710596405354*x_0)**2/(3.33509227650824*sin(2.348307982624872*x_0)**2 + 0.0869706869492789*cos(0.24026241796677425*x_0)**2 + 0.0282045931046195)
mse : 3762.535513899661

idx          : 0
expr skeleton: C_0*sin(C_1*x_0)**2/(C_2*sin(C_3*x_0)**2 + C_4*cos(C_5*x_0)**2 + C_6)
constants    : 289.216 2.37 3.335 2.348 0.087 0.24 0.028

