In [84]:
import json
import sympy as sp
import numpy as np

In [85]:
def round_expr(expr):
    d = {}
    for n in expr.atoms(sp.Number):
        d[n] = sp.Number('%.6g'%n)
    return expr.xreplace(d)

In [86]:
# Can be used to quickly compute the mse on all regimes!
def mse_all_regimes(M):
    '''
        M: Contains [mse_reg_1, mse_reg_2, ...] depending on no_regimes
        Computes the validation error on all regimes knowing the ones on the single regimes
    '''

    # Known parameters
    mse_reg_0 = 0.0353
    n_0 = 32419018

    n_21 = 62640812

    n_31 = 5742663
    n_32 = 56898149

    n_41 = 5742663
    n_42 = 18367245
    n_43 = 38530904

    N = n_0 + n_21
    
    # Two regimes
    if len(M) == 1:
        return (n_0*mse_reg_0 + n_21*M[0])/N
        
    # Three regimes
    if len(M) == 2:
        return (n_0*mse_reg_0 + n_31*M[0] + n_32*M[1])/N
        
    # Four regimes:
    if len(M) == 3:
        return (n_0*mse_reg_0 + n_41*M[0] + n_42*M[1] + n_43*M[2])/N        
    

In [87]:
# One regime
mse_all_regimes([195])

# Two regimes
mse_all_regimes([(69*58 + 50*35 + 38*35 + 27*58)/(58+35+35+58)])

# Three regimes
mse_all_regimes([68.821, 342.9278])

# Four regimes
mse_all_regimes([111.57860, 94.6587, 400.5611])

187.40306513974517

**Save the optimized equations in the usual format**

In [88]:
# To save the output.
optimized_eqns = {}
optimized_eqns['EQ1'] = {}
optimized_eqns['EQ2'] = {}
optimized_eqns['EQ3'] = {}
optimized_eqns['EQ4'] = {}
optimized_eqns['EQ5'] = {}
optimized_eqns['EQC'] = {}

In [89]:
all_possible_features = ['hus', 'clw', 'cli', 'ta', 'pa', 'zg', 'fr_land', 'U', 'rh', 'ps', 'hus_z', 'hus_zz', 'clw_z', 'clw_zz', 'cli_z',\
            'cli_zz', 'ta_z', 'ta_zz', 'pa_z', 'pa_zz', 'U_z', 'U_zz', 'rh_z', 'rh_zz']

loc = {}
for i in range(len(all_possible_features)):
    loc[all_possible_features[i]] = i
    
# Features
features = ['rh', 'ta', 'clw', 'cli', 'rh_z']
no_features = len(features)

In [90]:
# mean and std
mean_all = [4.12205844e-03,2.25493498e-05,3.38180032e-06,2.57065512e+02,6.00030443e+04,5.64080139e+03,2.35046400e-01,1.32776682e+01,6.02512234e-01,9.86270417e+04,-1.27545273e-06,-4.02484958e-10,1.65204582e-08,-4.34660202e-11,4.29441131e-10,-1.82817316e-12,-4.68742483e-03,-7.54899040e-07,-7.51544542e+00,-1.06989723e-04,1.65615172e-03,-9.27604679e-06,-4.76200071e-05,-1.32246548e-07]
std_all = [5.07648249e-03,5.69702638e-05,1.01308124e-05,3.00533874e+01,3.12514292e+04,5.66963918e+03,4.11184302e-01,1.11389888e+01,3.32494615e-01,6.24039256e+03,2.03179260e-06,1.17041141e-08,1.33311867e-07,1.42840744e-09,6.73384546e-09,5.07424672e-11,5.82875686e-03,6.34826092e-05,3.53136052e+00,1.13215264e-02,6.62892130e-03,6.08144307e-05,2.58065098e-04,2.49552692e-06]

mean = np.concatenate([np.expand_dims(mean_all[loc[sel_var]], axis=0) for sel_var in features], axis = 0)
std = np.concatenate([np.expand_dims(std_all[loc[sel_var]], axis=0) for sel_var in features], axis = 0)

In [91]:
# Introduce variables
a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r = sp.symbols('a b c d e f g h i j k l m n o p q r')
x0, x1, x2, x3, x4 = sp.symbols('x0 x1 x2 x3 x4')
rh, ta, clw, cli, rh_z = sp.symbols('rh ta clw cli rh_z')

X0 = (rh - mean[0])/std[0]
X1 = (ta - mean[1])/std[1]
X2 = (clw - mean[2])/std[2]
X3 = (cli - mean[3])/std[3]
X4 = (rh_z - mean[4])/std[4]

**The optimized equations**

All optimized equations do not always satisfy the RH-constraint any longer!!
Although, this is not because of the new coefficients, but due to the rounding function that was used in read_results.ipynb.

Note that Nelder-Mead and CG did not yield better results than BFGS. 
CG additionally took much longer.

In [92]:
EQ1 = a*x0 - b*x1 + c*x0*(d*x0 + x1**2) + e*x4**2 + f - g/(x2 + h*x3 + i)
EQ2 = a*x0 - b*x1 + c*x0*(d*x0 + x1**2) + x4**2*(-e*x0 + f) + g - h/(x2 + i*x3 + j)
EQ3 = a*x0 - b*x1 + c*x0*(d*x0 + x1**2) + x4**2*(-e*x0 + f*x2 + g) + h - i/(x2 + j*x3 + k)
EQ4 = a*x0 - b*x1 + c*x0*(d*x0 + x1**2) + x4**2*(e*x4 + f) + g - h/(x2 + i*x3 + j)
EQ5 = a*x0 - b*x1 + c*(x0 - d)*(e*x1 + f)*(g*x1 + h*x4 + i) + j*x3 + k - l/(x2 + m*x3 + n)
EQC = a*x0 - b*x1 + c*(x0 - d)*(e*x1 + f)*(g*x1 + h*x4 + i) + j*x3 + k - l/(x2 + m*x3 + n) + o*x0**2 + x4**2*(-p*x0 + q*x2 + r)

coefs = {}
coefs['EQ1'] = [38.85954116,42.70818472,19.34746465,1.11321032,2.36741444,44.99763015,1.90033063,0.65718667,0.63587944]
coefs['EQ2'] = [39.43056288,42.76501721,19.3884905,1.09140881,0.3367936,2.40223919,44.89356218,1.88236593,0.65477071,0.63495087]
coefs['EQ3'] = [40.95496237,42.34790137,19.02132676,1.03023501,1.63436586,0.60497085,1.99558239,45.07426184,1.69308033,0.61643344,0.62041551]
coefs['EQ4'] = [38.6562122,43.53500518,19.78403208,1.13637902,0.35299939,4.04888686,44.21730274,2.03128527,0.66971589,0.6409019]
coefs['EQ5'] = [32.44329895,19.70440805,1.0861006,0.91481989,1.30598435,2.1434675,9.05590254,4.14792458,8.26124985,1.2544264,65.70232142,1.78147668,0.66383166,0.63743728]
coefs['EQC'] = [57.17670262,44.19363628,0.34458399,0.15621854,1.74556606,-1.25748431,34.51082029,-11.51991327,35.15804056,0.94539871,42.06607242,1.44965323,0.58103565,0.60581514,17.51519417,1.81237248,0.72810479,2.18378277]

train_mses_per_regime = {}
train_mses_per_regime['EQ1'] = 163.564
train_mses_per_regime['EQ2'] = 163.467
train_mses_per_regime['EQ3'] = 162.671
train_mses_per_regime['EQ4'] = 159.336
train_mses_per_regime['EQ5'] = 170.916
train_mses_per_regime['EQC'] = 159.320

valid_mses_per_regime = {}
valid_mses_per_regime['EQ1'] = 162.342
valid_mses_per_regime['EQ2'] = 162.277
valid_mses_per_regime['EQ3'] = 161.634
valid_mses_per_regime['EQ4'] = 157.725
valid_mses_per_regime['EQ5'] = 169.586
valid_mses_per_regime['EQC'] = 158.198

parameters = {}
parameters['EQ1'] = 9
parameters['EQ2'] = 10
parameters['EQ3'] = 11
parameters['EQ4'] = 10
parameters['EQ5'] = 14
parameters['EQC'] = 18

for eq_num in ['1','2','3','4','5','C']:
    EQ = locals()['EQ%s'%eq_num]
    eq_sp = sp.sympify(EQ)

    # Taken from optimize_coefs_EQ1.ipynb
    new_coefs = coefs['EQ%s'%eq_num]

    subs_dict = {key : val for (key,val) in zip([a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r][:len(new_coefs)], new_coefs)}

    eq_sp = eq_sp.subs(subs_dict)

    eq_sp_orig_inputs = eq_sp.subs({(x0,X0), (x1,X1), (x2,X2), (x3,X3), (x4,X4)})

    optimized_eqns['EQ%s'%eq_num]['Equation w.r.t. normalized vars'] = str(round_expr(eq_sp))
    optimized_eqns['EQ%s'%eq_num]['Equation w.r.t. physical vars'] = str(round_expr(eq_sp_orig_inputs))
    optimized_eqns['EQ%s'%eq_num]['$df/drh$'] = '%s'%str(round_expr(eq_sp_orig_inputs.diff('rh')))
    optimized_eqns['EQ%s'%eq_num]['$df/dclw$'] = '%s'%str(round_expr(eq_sp_orig_inputs.diff('clw')))
    optimized_eqns['EQ%s'%eq_num]['$df/dcli$'] = '%s'%str(round_expr(eq_sp_orig_inputs.diff('cli')))
    optimized_eqns['EQ%s'%eq_num]['$df/dT$'] = '%s'%str(round_expr(eq_sp_orig_inputs.diff('ta')))
    # Taken from optimize_coefs_EQ1.ipynb
    optimized_eqns['EQ%s'%eq_num]['Train MSE in regime'] = train_mses_per_regime['EQ%s'%eq_num]
    optimized_eqns['EQ%s'%eq_num]['Valid MSE in regime'] = valid_mses_per_regime['EQ%s'%eq_num]
    optimized_eqns['EQ%s'%eq_num]['Valid MSE'] = mse_all_regimes([valid_mses_per_regime['EQ%s'%eq_num]])
    optimized_eqns['EQ%s'%eq_num]['Complexity'] = parameters['EQ%s'%eq_num]

    round_expr(eq_sp_orig_inputs)

In [93]:
# Dump the output
with open('./no_of_regimes_2/optimized_eqns.json', 'w') as file:
    json.dump(optimized_eqns, file)

**Sympy simplify making expressions more complicated**

In [515]:
expr = '((a + b)*(3 + 2*d) - 3)/(a + b)'
sp.simplify(sp.sympify(expr))

((a + b)*(2*d + 3) - 3)/(a + b)

In [516]:
print(sp.count_ops(expr, visual=True))
print(sp.count_ops(expr, visual=False))

3*ADD + DIV + 2*MUL + SUB
7


In [517]:
expr_desired = '(3 + 2*d) - 3/(a + b)'
sp.sympify(expr_desired)

2*d + 3 - 3/(a + b)

In [518]:
print(sp.count_ops(expr_desired, visual=True))
print(sp.count_ops(expr_desired, visual=False))

2*ADD + DIV + MUL + SUB
5


In [519]:
def my_measure(expr):
    return sp.count_ops(expr)

sp.simplify(expr, ratio=0.9, measure=my_measure)

((a + b)*(2*d + 3) - 3)/(a + b)

In [520]:
sp.apart(expr, 2*d+3)

(2*a*d + 3*a + 2*b*d + 3*b - 3)/(a + b)

In [521]:
sp.expand(expr)

2*a*d/(a + b) + 3*a/(a + b) + 2*b*d/(a + b) + 3*b/(a + b) - 3/(a + b)

In [522]:
sp.apart(expr)

NotImplementedError: multivariate partial fraction decomposition

Only solution to yield the desired expression I have found:

In [None]:
expr + ' + 3/(a+b)'
sp.sympify(str(sp.simplify(expr + ' + 3/(a+b)')) + '- 3/(a+b)')