# Symbolic Regression Metrics
This file holds all our results from
1. Benchmarking existing symbolic regression models
2. Symbolic regression on subsets of data with `Feyn`
3. Exploring new symbolic regression models with `gpg`

In [None]:
import pandas as pd
import numpy as np

# Benchmarking Existing Symbolic Regression Models

In [None]:
def print_metrics(file):
  """print the results from the median and the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file).round(decimals = 4)

  #report the run with the approx. median (the 5th highest) test r^2
  print("Metrics for Run with Median Test R2")

  display(metrics.loc[metrics['test_r2'] == np.sort(metrics['test_r2'].values)[4], \
    ['train_r2','test_r2','train_nrmse','test_nrmse','simplicity']])
  print("Equation:", metrics.loc[metrics['test_r2'] == np.sort(metrics['test_r2'].values)[4], 'equation'].values[0])

  #report the run with best test r^2
  print("\nMetrics for Run with Best Test R2")

  display(metrics.loc[metrics['test_r2'] == metrics['test_r2'].max(), \
    ['train_r2','test_r2','train_nrmse','test_nrmse','simplicity']])
  print("Equation:", metrics.loc[metrics['test_r2'] == metrics['test_r2'].max(), 'equation'].values[0])

## GPLEARN

In [None]:
print_metrics('benchmark_metrics_gplearn.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
7,0.8296,0.8311,0.4889,0.4903,-1.8


Equation: (KDP*RhoHV**3*Z*(KDP*RhoHV**2*Z**2)**0.5)**0.5

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8117,0.8721,0.5065,0.4421,-1.8


Equation: (KDP*RhoHV**4*Z*(KDP*RhoHV**2*Z**2)**0.5)**0.5


## GPG

In [None]:
# the file for GPG has different column names, so the code below is used
# in the equation, x_0 is Z, x_1 is ZDR, x_2 is KDP, x_3 is RhoHV

metrics = pd.read_csv('regular_results.csv')

#report the run with the approx. median (the 5th highest) test r^2
print("Metrics for Run with Median Test R2")

display(metrics.loc[metrics['Test R2s'] == np.sort(metrics['Test R2s'].values)[4], \
  ['Train R2s','Test R2s','Train NRMSES','Test NRMSES','Simplicities']])
print("Equation:", metrics.loc[metrics['Test R2s'] == np.sort(metrics['Test R2s'].values)[4], 'Models'].values[0])

#report the run with best test r^2
print("\nMetrics for Run with Best Test R2")

display(metrics.loc[metrics['Test R2s'] == metrics['Test R2s'].max(), \
  ['Train R2s','Test R2s','Train NRMSES','Test NRMSES','Simplicities']])
print("Equation:", metrics.loc[metrics['Test R2s'] == metrics['Test R2s'].max(), 'Models'].values[0])

Metrics for Run with Median Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
8,0.8824,0.8824,0.4018,0.4246,-2.4


Equation: '(0.0508922263979912*x_0 - 0.162755370140076*x_2 + 0.0508922263979912*cos(x_0))*(1.75166165828705*x_2 + x_3 - 22.1700248718262/x_0)*(2*x_0*x_3**2 - x_0 - x_2 + cos(0.598085820674896*x_0))/(x_1*x_3 + x_3 + 2.49769043922424) - 0.0730332061648369'

Metrics for Run with Best Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
6,0.8744,0.9049,0.4115,0.3842,-2.5


Equation: '-0.009477709373552686*x_0*x_1*x_3*(x_1 - 5.078491)/(x_3 - 4.577389) + 0.004744200993516793*x_0*x_3**2*(x_0 + x_2)*(x_2 + 0.5811773311853556)*(x_3 + cos(x_1) + cos(x_2 - x_3) + 0.8610352745595037) + (0.004745682268064854*x_2 + 0.007022129103868347)*(-cos(x_3) + x_2/x_1)/cos(x_0) - 4.549306'


## PYSR

In [None]:
print_metrics('benchmark_metrics_pysr.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
2,0.8885,0.872,0.3916,0.442,-1.9


Equation: Abs(((KDP - 1*(-0.56335634))*(Z - 2.5335507*ZDR*exp(sin(ZDR))))**RhoHV - 1*12.397555)

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
6,0.8533,0.8943,0.4446,0.405,-1.5


Equation: Abs(0.8496046**ZDR*Z*(KDP + 0.48193857) - 10.924554)


## PYSR (with constraints on complexity)

In [None]:
print_metrics('benchmark_metrics_pysr_simplified.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
7,0.8409,0.8452,0.4723,0.4695,-1.5


Equation: (KDP + 0.11323998)*(Z**RhoHV - ZDR)

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8291,0.8864,0.4825,0.4166,-1.5


Equation: Z**RhoHV*(KDP + 0.13871016) - ZDR


## FFX

In [None]:
print_metrics('benchmark_metrics_ffx.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
2,0.8971,0.8553,0.3761,0.4698,-4.2


Equation: 11.6 + 2.60e5*max(0,KDP-1.33) * max(0,0.955-RhoHV) + 5.39e4*max(0,KDP-1.01) * max(0,0.939-RhoHV) - 4.91e4*max(0,KDP-1.33) * max(0,RhoHV-0.987) - 3.33e4*max(0,KDP-1.01) * max(0,0.955-RhoHV) + 1.55e4*max(0,0.939-RhoHV) * max(0,0.924-RhoHV) + 1.52e4*max(0,KDP-0.386) * max(0,0.924-RhoHV) - 9276*max(0,KDP-1.33) * max(0,0.971-RhoHV) + 7984*max(0,KDP-1.33) * max(0,RhoHV-0.971) + 7451*max(0,KDP-1.01) * max(0,0.971-RhoHV) + 5488*max(0,0.971-RhoHV) * max(0,0.955-RhoHV) + 4821*max(0,KDP-1.01) * max(0,RhoHV-0.971) + 4625*max(0,KDP-0.386) * max(0,0.939-RhoHV) + 4514*max(0,KDP-0.386) * max(0,RhoHV-0.987) - 3470*max(0,KDP-0.386) * max(0,0.955-RhoHV) - 3399*max(0,0.939-RhoHV) * max(0,0.955-RhoHV) - 3147*max(0,0.0720-KDP) * max(0,0.924-RhoHV) - 2344*max(0,0.924-RhoHV) * max(0,0.971-RhoHV) - 2036*max(0,RhoHV-0.971) * max(0,RhoHV-0.987) - 1872*max(0,KDP-1.01) * max(0,RhoHV-0.987) - 1592*KDP * max(0,0.924-RhoHV) - 1480*max(0,KDP-0.386) * max(0,RhoHV-0.971) - 1458*max(0,0.939-RhoHV) * max(0,0.971

Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
6,0.884,0.902,0.3955,0.3899,-4.0


Equation: -4.78 - 2.80e4*max(0,KDP-1.46) * max(0,RhoHV-0.987) - 1.74e4*max(0,0.940-RhoHV) * max(0,0.956-RhoHV) - 1.69e4*max(0,RhoHV-1.00) * max(0,RhoHV-0.987) + 1.49e4*max(0,0.0308-KDP) * max(0,RhoHV-1.00) + 8965*KDP * max(0,RhoHV-1.00) + 8389*max(0,0.924-RhoHV) * max(0,0.940-RhoHV) + 7471*max(0,KDP-0.317) * max(0,RhoHV-0.987) - 7464*max(0,RhoHV-0.972) * max(0,RhoHV-0.987) + 7234*max(0,KDP-0.317) * max(0,0.924-RhoHV) - 6061*max(0,KDP-1.18) * max(0,0.956-RhoHV) + 3850*max(0,0.924-RhoHV) * max(0,0.956-RhoHV) + 3081*max(0,0.924-RhoHV) * max(0,0.0308-KDP) - 2457*KDP * max(0,RhoHV-0.987) - 2417*max(0,KDP-0.317) * max(0,0.940-RhoHV) - 2161*max(0,0.0308-KDP) * max(0,0.956-RhoHV) + 1958*max(0,KDP-1.18) * max(0,RhoHV-0.972) - 1893*max(0,KDP-0.317) * max(0,RhoHV-0.972) + 1709*max(0,KDP-1.18) * max(0,0.972-RhoHV) + 1432*KDP * max(0,RhoHV-0.972) + 1217*max(0,0.0308-KDP) * max(0,0.972-RhoHV) + 992*max(0,KDP-0.317) * max(0,0.956-RhoHV) + 933*max(0,0.0308-KDP) * max(0,RhoHV-0.972) - 786*KDP * max(0,0

## FEYN

In [None]:
print_metrics('benchmark_metrics_feyn.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
4,0.894,0.8792,0.3864,0.4142,-2.4


Equation: 82.2809 - 81.7592*exp(-0.469416*(Kdp + 0.160882)**2 - 405.53*(0.00348316*Reflectivity - Rhohv + 0.661661)**2*exp(-17.66*(0.0123609*Reflectivity - 1)**2 - 0.33283*(-Zdr - 0.03981)**4))

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8757,0.9046,0.4116,0.3817,-2.4


Equation: 103.701*exp(-2.38136*(0.34005*Kdp + 0.916437*exp(-34.6158*(0.806063*Rhohv - 1)**2 - 0.562185*(Zdr - 0.159972)**2) - 1)**2 - 2.0*tanh(0.0698637*Reflectivity - 3.33322)**2) - 2.54965


## RILS-ROLS

In [None]:
print_metrics('benchmark_metrics_RILS_ROLS.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
9,0.8944,0.8859,0.3904,0.3894,-2.0


Equation: 1.23358*KDP*RhoHV**4*Z - 21.213749*KDP + 2.0e-6*Z**4*exp(RhoHV**4*cos(ZDR)) - 0.855046

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8824,0.9145,0.4003,0.39,-1.9


Equation: 1.207781*KDP*RhoHV**3*Z - 20.088446*KDP + 2.0e-6*RhoHV**4*Z**4*exp(cos(ZDR)) - 0.642722


## DSO

In [None]:
print_metrics('benchmark_metrics_dso.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
3,0.8998,0.894,0.3743,0.392,-3.1


Equation: -3.52726e-6*KDP**2*RhoHV - 0.848864*KDP**2*Z + 8.60403*KDP**2*ZDR + 44.9859*KDP**2 + 3332.15*KDP*RhoHV**2 + 3.07109*KDP*RhoHV*Z - 271.353*KDP*RhoHV*ZDR - 6057.87*KDP*RhoHV - 0.0210111*KDP*Z**2 - 0.809575*KDP*Z*ZDR + 1.44773*KDP*ZDR**2 + 277.645*KDP*ZDR + 2678.61*KDP - 11289.0*RhoHV**3 - 69.7903*RhoHV**2*Z - 143.76*RhoHV**2*ZDR + 34513.9*RhoHV**2 + 134.925*RhoHV*Z + 3.58274*RhoHV*ZDR**2 + 265.364*RhoHV*ZDR - 35065.9*RhoHV + 0.000873994*Z**3 + 0.00408534*Z**2*ZDR - 0.0510384*Z**2 - 0.0434901*Z*ZDR**2 - 0.232257*Z*ZDR - 64.0691*Z + 0.123787*ZDR**3 - 2.67691*ZDR**2 - 119.335*ZDR + sin(exp(KDP*RhoHV)) + 11833.5

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8909,0.9188,0.3856,0.3523,-3.2


Equation: log(RhoHV + exp(RhoHV*(5.35455*KDP**3 - 56.2168*KDP**2*RhoHV - 1.57664*KDP**2*Z + 6.58712*KDP**2*ZDR + 122.85*KDP**2 + 4137.68*KDP*RhoHV**2 + 1.82008*KDP*RhoHV*Z - 263.978*KDP*RhoHV*ZDR - 7503.71*KDP*RhoHV - 0.672237*KDP*Z*ZDR + 1.41999*KDP*ZDR**2 + 268.832*KDP*ZDR + 3337.47*KDP - 18268.5*RhoHV**3 - 72.3069*RhoHV**2*Z - 256.629*RhoHV**2*ZDR + 54749.1*RhoHV**2 + 138.997*RhoHV*Z + 7.65171*RhoHV*ZDR**2 + 471.058*RhoHV*ZDR - 54599.3*RhoHV + 0.000746417*Z**3 - 0.000717661*Z**2*ZDR - 0.0383317*Z**2 - 0.0194656*Z*ZDR**2 - 66.0092*Z + 0.095745*ZDR**3 - 7.33*ZDR**2 - 214.797*ZDR + 18114.8) + cos(exp(-RhoHV + exp(ZDR/log(RhoHV + Z))))))


## PYOPERON

In [None]:
print_metrics('benchmark_metrics_operon.csv')

Metrics for Run with Median Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
8,0.9003,0.8938,0.3701,0.3782,-2.5


Equation: 0.997*(57.773*KDP + 0.908265213442325*(-73.304*KDP*(-4.577*RhoHV + 0.034*ZDR)/sqrt(0.158404*ZDR**2 + 1) - 371.0*KDP + 0.271*Z - 5.16)/(sqrt(ZDR**2 + 0.824945697949433)*sqrt(sin(9.431*RhoHV)**2 + 1)))*(0.095*Z - 1.858) + 0.075

Metrics for Run with Best Test R2


Unnamed: 0,train_r2,test_r2,train_nrmse,test_nrmse,simplicity
1,0.8882,0.9214,0.3902,0.3787,-2.6


Equation: -0.044 + 1.002*((-4.72*KDP - 0.168*Z)/sqrt(0.811801*ZDR**2 + 1) - sin(0.208*Z))*(-39.349549*KDP*RhoHV - 760.328*KDP + (797.134*KDP - 0.052*Z)/sqrt(0.003844*ZDR**2 + 1) - sin(0.149*Z))/sqrt(0.487204*ZDR**2 + 1)


# Symbolic Regression on Subsets of Data

## Metrics for Rain Type

In [None]:
def print_metrics_rain_type(file_name):
  """print the results for each rain type from the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file_name).round(decimals = 4)

  print("Metrics for Run with Best Train R2")

  print("\nStratiform")
  display(metrics.loc[metrics['str_test_r2'] == metrics['str_test_r2'].max(), \
    ['str_train_r2','str_test_r2','str_train_nrmse','str_test_nrmse','str_simplicity']])
  print("Equation:", metrics.loc[metrics['str_test_r2'] == metrics['str_test_r2'].max(), 'str_equation'].values[0])

  print("\nConvective")
  display(metrics.loc[metrics['con_test_r2'] == metrics['con_test_r2'].max(), \
    ['con_train_r2','con_test_r2','con_train_nrmse','con_test_nrmse','con_simplicity']])
  print("Equation:", metrics.loc[metrics['con_test_r2'] == metrics['con_test_r2'].max(), 'con_equation'].values[0])

  print("\nMixed")
  display(metrics.loc[metrics['mix_test_r2'] == metrics['mix_test_r2'].max(), \
    ['mix_train_r2','mix_test_r2','mix_train_nrmse','mix_test_nrmse','mix_simplicity']])
  print("Equation:", metrics.loc[metrics['mix_test_r2'] == metrics['mix_test_r2'].max(), 'mix_equation'].values[0])

In [None]:
print_metrics_rain_type('subset_metrics_feyn_rain_type.csv')

Metrics for Run with Best Train R2

Stratiform


Unnamed: 0,str_train_r2,str_test_r2,str_train_nrmse,str_test_nrmse,str_simplicity
5,0.7436,0.7877,0.4247,0.3989,-2.4


Equation: 25.0198*exp(-2.0*exp(-0.499665*(0.731905*KDP + 1)**2 - 6.65792*(KDP + (0.62459666131 - 0.31995714316*ZDR)*(17.0616*RhoHV - 16.2494) - 0.0890478)**2) - 2.0*tanh(0.0926277*Z - 3.80896)**2) - 0.5206

Convective


Unnamed: 0,con_train_r2,con_test_r2,con_train_nrmse,con_test_nrmse,con_simplicity
8,0.8422,0.8818,0.2941,0.3033,-2.0


Equation: 32.7792*KDP + 42.7995*exp(-1.58551*(0.587239 - ZDR)**2 - 8.90729*(1 - 0.0148648*Z)**2) + 0.79742

Mixed


Unnamed: 0,mix_train_r2,mix_test_r2,mix_train_nrmse,mix_test_nrmse,mix_simplicity
9,0.533,0.7051,0.4287,0.3044,-1.7


Equation: 28.6847*KDP - 11.3726*tanh(-0.223793*Z + 1.55221*ZDR + 8.1941) + 12.2602


## Metrics for Nodes

In [None]:
def print_metrics_nodes(file_name):
  """print the results for each node from the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file_name).round(decimals = 4)

  # Best Run
  print("Metrics for Run with Best Test R2")

  print("\nNode 0")
  display(metrics.loc[metrics['n0_test_r2'] == metrics['n0_test_r2'].max(), \
    ['n0_train_r2','n0_test_r2','n0_train_nrmse','n0_test_nrmse','n0_simplicity']])
  print("Equation:", metrics.loc[metrics['n0_test_r2'] == metrics['n0_test_r2'].max(), 'n0_equation'].values[0])

  print("\nNode 1")
  display(metrics.loc[metrics['n1_test_r2'] == metrics['n1_test_r2'].max(), \
    ['n1_train_r2','n1_test_r2','n1_train_nrmse','n1_test_nrmse','n1_simplicity']])
  print("Equation:", metrics.loc[metrics['n1_test_r2'] == metrics['n1_test_r2'].max(), 'n1_equation'].values[0])

  print("\nNode 2")
  display(metrics.loc[metrics['n2_test_r2'] == metrics['n2_test_r2'].max(), \
    ['n2_train_r2','n2_test_r2','n2_train_nrmse','n2_test_nrmse','n2_simplicity']])
  print("Equation:", metrics.loc[metrics['n2_test_r2'] == metrics['n2_test_r2'].max(), 'n2_equation'].values[0])

In [None]:
print_metrics_nodes('subset_metrics_feyn_decision_tree.csv')

Metrics for Run with Best Test R2

Node 0


Unnamed: 0,n0_train_r2,n0_test_r2,n0_train_nrmse,n0_test_nrmse,n0_simplicity
0,0.5775,0.633,0.462,0.4271,-2.1


Equation: 14.0754*exp(-41.0903*(0.178971 - KDP)**2*(ZDR + 0.253934)**2 - 23.0611*(1 - 0.0237193*Z)**2) + 1.62759

Node 1


Unnamed: 0,n1_train_r2,n1_test_r2,n1_train_nrmse,n1_test_nrmse,n1_simplicity
3,0.451,0.5624,0.3923,0.3123,-2.1


Equation: 7.04311*KDP + 1.17*Z + 12.9415*exp(-7.72534*(KDP - 0.381355)**2 - 397.063*(-RhoHV + 0.0525347*ZDR + 0.967249)**2) - 41.4989

Node 2


Unnamed: 0,n2_train_r2,n2_test_r2,n2_train_nrmse,n2_test_nrmse,n2_simplicity
7,0.7773,0.7826,0.2277,0.2199,-2.0


Equation: (0.065445966672*KDP - 0.0362056*(0.239369*Z - 9.61017)*(0.354271*ZDR - 0.558725) - 0.0061401439096)*exp(6.46578*RhoHV) + 7.644


## Metrics for Radar Variable

In [None]:
def print_metrics_radar(file_name, variable):
  """print the results for the above mean group and the below mean group
  from the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file_name).round(decimals = 4)

  print("Metrics for Run with Best Test R2")

  print("\nAbove Mean")
  display(metrics.loc[metrics[f'a_{variable}_test_r2'] == metrics[f'a_{variable}_test_r2'].max(), \
    [f'a_{variable}_train_r2',f'a_{variable}_test_r2',f'a_{variable}_train_nrmse',f'a_{variable}_test_nrmse',f'a_{variable}_simplicity']])
  print("Equation:", metrics.loc[metrics[f'a_{variable}_test_r2'] == metrics[f'a_{variable}_test_r2'].max(), f'a_{variable}_equation'].values[0])

  print("\nBelow Mean")
  display(metrics.loc[metrics[f'b_{variable}_test_r2'] == metrics[f'b_{variable}_test_r2'].max(), \
    [f'b_{variable}_train_r2',f'b_{variable}_test_r2',f'b_{variable}_train_nrmse',f'b_{variable}_test_nrmse',f'b_{variable}_simplicity']])
  print("Equation:", metrics.loc[metrics[f'b_{variable}_test_r2'] == metrics[f'b_{variable}_test_r2'].max(), f'b_{variable}_equation'].values[0])

In [None]:
print_metrics_radar('subset_metrics_feyn_ZDR_mean.csv', 'ZDR')

Metrics for Run with Best Test R2

Above Mean


Unnamed: 0,a_ZDR_train_r2,a_ZDR_test_r2,a_ZDR_train_nrmse,a_ZDR_test_nrmse,a_ZDR_simplicity
7,0.9161,0.9519,0.362,0.2976,-2.2


Equation: (6.2164892132*KDP + 14.9178308328)*(1.70403*KDP + 0.0144471*Z + exp(-1.57949*(0.4712 - ZDR)**2 - 21.2372*(1 - 0.0206015*Z)**2) - 1.70698) + 19.7032

Below Mean


Unnamed: 0,b_ZDR_train_r2,b_ZDR_test_r2,b_ZDR_train_nrmse,b_ZDR_test_nrmse,b_ZDR_simplicity
2,0.8538,0.8538,0.4096,0.4314,-2.3


Equation: 8.39839*(KDP + 0.104083)**2*(49.1884*RhoHV - 47.0173) + 57.0819*exp(-1.12267*(0.591598*KDP - 1)**2 - 13.5908*(-0.020044*Z + 0.0149188*ZDR + 1)**2) + 1.26861


In [None]:
print_metrics_radar('subset_metrics_feyn_RhoHV_mean.csv', 'RhoHV')

Metrics for Run with Best Test R2

Above Mean


Unnamed: 0,a_RhoHV_train_r2,a_RhoHV_test_r2,a_RhoHV_train_nrmse,a_RhoHV_test_nrmse,a_RhoHV_simplicity
3,0.9023,0.9132,0.3554,0.3898,-2.1


Equation: (131.47506789*KDP + 47.630194986)*exp(-4.86836*(1 - 0.0143459*Z)**2 - 0.339622*(0.396623*ZDR + 1)**2) - 3.35018

Below Mean


Unnamed: 0,b_RhoHV_train_r2,b_RhoHV_test_r2,b_RhoHV_train_nrmse,b_RhoHV_test_nrmse,b_RhoHV_simplicity
3,0.8586,0.9025,0.4268,0.3985,-2.4


Equation: 59.1259*exp(-11.4971*(0.0208702*Z - 1)**2 - 2.0*(-0.658518*KDP + exp(-6.07572*(1 - 0.461377*ZDR)**2 - 0.955173*(0.0948836*ZDR + 1)**2) + 0.938124)**2) + 1.14885


## Metrics for Clusters

In [None]:
def print_metrics_clusters(file_name):
  """print the mean results from the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file_name).round(decimals = 4)

  # Best Run
  print("\nMetrics for Run with Best Test R2 (mean of 3 clusters)")

  display(metrics.loc[metrics['mean_test_r2'] == metrics['mean_test_r2'].max(), \
    ['mean_train_r2','mean_test_r2','mean_train_nrmse','mean_test_nrmse','mean_simplicity']])

In [None]:
def print_metrics_all_clusters(file_name):
  """print the results for each cluster from the best trial out of the 10 total trials"""

  metrics = pd.read_csv(file_name).round(decimals = 4)

  # Best Run
  print("Metrics for Run with Best Test R2")

  print("\nCluster 0")
  display(metrics.loc[metrics['c0_test_r2'] == metrics['c0_test_r2'].max(), \
    ['c0_train_r2','c0_test_r2','c0_train_nrmse','c0_test_nrmse','c0_simplicity']])
  print("Equation:", metrics.loc[metrics['c0_test_r2'] == metrics['c0_test_r2'].max(), 'c0_equation'].values[0])

  print("\nCluster 1")
  display(metrics.loc[metrics['c1_test_r2'] == metrics['c1_test_r2'].max(), \
    ['c1_train_r2','c1_test_r2','c1_train_nrmse','c1_test_nrmse','c1_simplicity']])
  print("Equation:", metrics.loc[metrics['c1_test_r2'] == metrics['c1_test_r2'].max(), 'c1_equation'].values[0])

  print("\nCluster 2")
  display(metrics.loc[metrics['c2_test_r2'] == metrics['c2_test_r2'].max(), \
    ['c2_train_r2','c2_test_r2','c2_train_nrmse','c2_test_nrmse','c2_simplicity']])
  print("Equation:", metrics.loc[metrics['c2_test_r2'] == metrics['c2_test_r2'].max(), 'c2_equation'].values[0])

### K-Means

In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_kmeans_radar.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
7,0.7382,0.7826,0.4142,0.3784,-2.1333


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_kmeans_rhozdr.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
7,0.9048,0.92,0.3605,0.325,-2.2333


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_kmeans_rain.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
4,0.6318,0.6764,0.2456,0.2434,-2.1333


In [None]:
print_metrics_all_clusters('subset_metrics_feyn_cluster_kmeans_rhozdr.csv')

Metrics for Run with Best Test R2

Cluster 0


Unnamed: 0,c0_train_r2,c0_test_r2,c0_train_nrmse,c0_test_nrmse,c0_simplicity
0,0.8979,0.9125,0.3767,0.3458,-2.5


Equation: 99.8572*exp(-1.19802*(0.359975*KDP - 1)**2 - 9.12357*(0.0166261*Z - 1)**2*(-0.600963*KDP + 0.755947*exp(-5.97919*(1 - 0.570242*ZDR)**2 - 1.05766*(0.672744*KDP - 1)**2) + 1)**2) + 0.288217

Cluster 1


Unnamed: 0,c1_train_r2,c1_test_r2,c1_train_nrmse,c1_test_nrmse,c1_simplicity
9,0.9282,0.9581,0.2849,0.2134,-2.2


Equation: 58.0967 - 68.4041*exp(-0.459895*(KDP + 0.495075)**2 - 11.79*(exp(-6.83538*(1 - 0.0344704*Z)**2 - 5.78763*(KDP - 0.732112)**2) - 0.122661)**2)

Cluster 2


Unnamed: 0,c2_train_r2,c2_test_r2,c2_train_nrmse,c2_test_nrmse,c2_simplicity
3,0.8778,0.9473,0.4446,0.301,-1.7


Equation: 37.3265*tanh(0.638661*KDP + 0.0716306*Z - 0.0767293*ZDR - 3.85633) + 38.2411


### Bisecting K-Means

In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_bkmeans_radar.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
1,0.7129,0.7527,0.43,0.3936,-1.9667


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_bkmeans_rhozdr.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
6,0.9064,0.898,0.3554,0.3722,-2.0667


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_bkmeans_rain.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
8,0.6106,0.6069,0.2214,0.2265,-2.0


### Agglomerative Clustering

In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_agg_radar.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
3,0.7556,0.7887,0.4006,0.3727,-2.1333


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_agg_rhozdr.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
3,0.8973,0.8746,0.3758,0.398,-2.3


In [None]:
print_metrics_clusters('subset_metrics_feyn_cluster_agg_rain.csv')


Metrics for Run with Best Test R2 (mean of 3 clusters)


Unnamed: 0,mean_train_r2,mean_test_r2,mean_train_nrmse,mean_test_nrmse,mean_simplicity
4,0.6466,0.6623,0.2201,0.2248,-1.9667


# Exploring New Symbolic Regression Models

In [None]:
def print_metrics_gpg(file):
  """print the results from the best trial out of the 10 total trials;
  in the equations, x_0 is Z, x_1 is ZDR, x_2 is KDP, x_3 is RhoHV"""

  metrics = pd.read_csv(file).round(decimals = 4)

  #report the run with best test r^2
  print("\nMetrics for Run with Best Test R2")

  display(metrics.loc[metrics['Test R2s'] == metrics['Test R2s'].max(), \
    ['Train R2s','Test R2s','Train NRMSES','Test NRMSES','Simplicities']])
  print("Equation:", metrics.loc[metrics['Test R2s'] == metrics['Test R2s'].max(), 'Models'].values[0])

In [None]:
# print results from gpg without adjustments (same as gpg from benchmarking)
print_metrics_gpg('regular_results.csv')


Metrics for Run with Best Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
6,0.8744,0.9049,0.4115,0.3842,-2.5


Equation: '-0.009477709373552686*x_0*x_1*x_3*(x_1 - 5.078491)/(x_3 - 4.577389) + 0.004744200993516793*x_0*x_3**2*(x_0 + x_2)*(x_2 + 0.5811773311853556)*(x_3 + cos(x_1) + cos(x_2 - x_3) + 0.8610352745595037) + (0.004745682268064854*x_2 + 0.007022129103868347)*(-cos(x_3) + x_2/x_1)/cos(x_0) - 4.549306'


In [None]:
print_metrics_gpg('Z-R_λ=1.0_results.csv')


Metrics for Run with Best Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
6,0.8546,0.89,0.4427,0.4132,-2.3


Equation: 0.00159029522910714*x_0*x_3**2*(x_0 - x_2)*(x_2 + 0.593977749347687)*(x_3 + cos(x_1) + cos(x_2 - x_3) + 6.94880723953247) - 0.335680693387985*sin(cos(1) + 6.58548784255981/(x_1 - 2.70159029960632)) - 4.36045265197754


In [None]:
print_metrics_gpg('clusters_cluster_kmeans_rhozdr_λ=20.0_results.csv')


Metrics for Run with Best Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
6,0.8746,0.906,0.411,0.3819,-2.5


Equation: -0.0148754622787237*x_0*x_1*x_3*(x_1 - 4.9152398109436)/(x_3 - 4.57189130783081) + 0.00426671048626304*x_0*x_3**2*(x_0 + x_2)*(x_2 + 0.602297484874725)*(x_3 + cos(x_1) + cos(x_2 - x_3) + 1.20319759845734) - (0.0136936511844397*x_2 + 0.00167051586322486)*(cos(x_3) - x_2/x_1)/cos(x_0) - 4.75743007659912


In [None]:
print_metrics_gpg('binned-rainfall_λ=0.01_results.csv')


Metrics for Run with Best Test R2


Unnamed: 0,Train R2s,Test R2s,Train NRMSES,Test NRMSES,Simplicities
6,0.8748,0.9067,0.4108,0.3804,-2.3


Equation: 0.0051016635261476*x_0*x_3**2*(x_0 - x_2)*(x_2 + 0.485699206590652)*(x_3 + cos(x_1) + cos(x_2 - x_3) + 0.926511168479919) - 0.752737104892731*sin(cos(1) + 5.98380661010742/(x_1 - 2.69139385223389)) - 4.5027232170105
