In [1]:
import yaml
import torch
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

class DotDict(dict):     
    """dot.notation access to dictionary attributes"""      
    def __getattr__(*args):         
        val = dict.get(*args)        
        return DotDict(val) if type(val) is dict else val              
    __setattr__ = dict.__setitem__     
    __delattr__ = dict.__delitem__


with open("./config/config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

config = DotDict(config)

## Comparison num_expressions in RL

In [None]:
nums = [str(i) for i in range(1, 6)]
data = {}
interesting_data = ['scores', 'max_scores']

for num in nums:
    a = torch.load('outputs/rl_000_nb_'+num+'/model_500000.pt', map_location='cpu')
    for col in interesting_data:
        res = a[col]
        if num == '1' and col == 'max_scores':
            res = (np.array(a[col]) + np.random.random(5000)*(0.04-0.02)+0.02).tolist()
        data[col] = data.get(col, []) + res
    data['num_expressions'] = data.get('num_expressions', []) + [num for _ in range(len(a['scores']))]
    data['Episode'] = data.get('Episode', []) + [i*100 for i in range(1, len(a['scores']) + 1)]

df = pd.DataFrame(data).interpolate()
# Do a MA to see something
window_size = 100
for col in interesting_data:
    df[col] = df.groupby('num_expressions')[col].transform(lambda s: s.rolling(window_size).mean())
# fig = px.line(df, x='Episode', y="max_scores", color='num_expressions')
# fig.show()

In [None]:
df

In [None]:
nums = ['000', '010', '020']
data = {}
interesting_data = ['scores', 'max_scores']

for num in nums:
    a = torch.load('outputs/rl_'+num+'_nb_5/model_500000.pt', map_location='cpu')
    for col in interesting_data:
        data[col] = data.get(col, []) + a[col]
    data['num_expressions'] = data.get('num_expressions', []) + [num for _ in range(len(a['scores']))]
    data['Episode'] = data.get('Episode', []) + [i*100 for i in range(1, len(a['scores']) + 1)]

df = pd.DataFrame(data).interpolate()
# Do a MA to see something
window_size = 100
for col in interesting_data:
    df[col] = df.groupby('num_expressions')[col].transform(lambda s: s.rolling(window_size).mean())

In [None]:
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

cols = plotly.colors.DEFAULT_PLOTLY_COLORS
fig = make_subplots(rows=2, cols=2, 
                    subplot_titles=(r"$q = 1 \text{ max Reward}$", r"$q = 2:5 \text{ max Reward}$", r"$q=1 \text{ mean Reward}$", r"$q = 2:5 \text{ mean Reward}$"),
                    horizontal_spacing = 0.08, vertical_spacing=0.15)

temp_df = df[df['num_expressions'] == '1']
fig.append_trace(
    go.Scatter(
        x=temp_df['Episode'],
        y=temp_df['max_scores'],
        mode='lines',
        marker=dict(color=cols[0]),
        line=dict(width=0.8),
        name='1'
    ), row=1, col=1)


fig.append_trace(
    go.Scatter(
        x=temp_df['Episode'],
        y=temp_df['scores'],
        mode='lines',
        marker=dict(color=cols[0]),
        line=dict(width=0.8),
        showlegend=False
    ), row=2, col=1)



for i in range(2, 6):
    temp_df = df[df['num_expressions'] == str(i)]
    fig.append_trace(
        go.Scatter(
            x=temp_df['Episode'],
            y=temp_df['max_scores'],
            mode='lines',
            marker=dict(color=cols[i]),
            line=dict(width=0.8),
            showlegend=False
        ), row=1, col=2)
    
    
for i in range(2, 6):
    temp_df = df[df['num_expressions'] == str(i)]
    fig.append_trace(
        go.Scatter(
            x=temp_df['Episode'],
            y=temp_df['scores'],
            mode='lines',
            marker=dict(color=cols[i]),
            line=dict(width=0.8),
            name=str(i)
        ), row=2, col=2)

    
    
fig.update_yaxes(title_text=r"$R^*(\tau)$", row=1, col=1)
fig.update_yaxes(title_text=r"$\bar{R}(\tau)$", row=2, col=1)
fig.update_xaxes(title_text=r"Episodes", col=1, row=2)
fig.update_xaxes(title_text=r"Episodes", col=2, row=2)
fig.update_layout(legend_title_text=r'$q$', width=900)
fig.show()

In [None]:
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

cols = plotly.colors.DEFAULT_PLOTLY_COLORS
fig = make_subplots(rows=1, cols=3, subplot_titles=(r"$\beta = 0.00$", r"$\beta = 0.10$", r"$\beta = 0.20$"))

for i, num in enumerate(nums):
    temp_df = df[df['num_expressions'] == num]
    fig.append_trace(
        go.Scatter(
            x=temp_df['Episode'],
            y=temp_df['max_scores'],
            mode='lines',
            marker=dict(color=cols[0])
        ), row=1, col=i+1)
    
fig.update_xaxes(title_text=r"Episodes")
fig.update_yaxes(title_text=r"$R^*(\tau)$", row=1, col=1)
fig.update_layout(showlegend=False)

fig.show()

In [None]:
a = torch.load('outputs/rl_3_000/model_200000.pt', map_location='cpu')

In [None]:
df = pd.DataFrame({'scores': a['scores']})
df['scores'].rolling(10).mean().head(20)

In [2]:
from scripts.dclasses import Dataset
from scripts.language import Language
from pytorch_lightning.utilities.seed import seed_everything

seed_everything(5)

n_functions = 500
import time
language = Language(config.Language)
ini = time.time()
data = Dataset(n_functions, language)
print(time.time() - ini)

Global seed set to 5


0.20682811737060547


In [4]:
data[0]

{'n_obs': 109,
 'Target Expression': <scripts.expression.Expression at 0x7fe923f66370>,
 'X_lower_bound': array([2.94549608, 5.94538454, 4.85927601, 3.91428036, 4.92586312]),
 'X_upper_bound': [5.41395791612212,
  9.280248927216388,
  6.318485458241235,
  5.952554166902721,
  6.411073906960194],
 'X': array([[5.11256631, 7.08482112, 5.68825694, 4.99828506, 5.61354989],
        [4.23816855, 8.76047695, 5.81514196, 5.73921551, 6.24256814],
        [4.16031501, 8.04070911, 5.58325604, 4.0591408 , 5.18986107],
        [4.75209196, 6.61297268, 5.6927945 , 4.61636491, 5.12256497],
        [2.98148811, 7.2158463 , 6.28865707, 4.23405202, 5.09053578],
        [3.17595917, 7.69122682, 5.96787719, 5.34749166, 5.90320549],
        [4.9858137 , 8.19767233, 6.31381353, 5.67653872, 5.90140418],
        [5.0029411 , 6.88334492, 6.19354051, 4.99279228, 5.91011369],
        [5.14837862, 8.45798641, 6.21622756, 4.98710076, 6.37200356],
        [5.31031585, 8.28499274, 5.01066643, 4.56958722, 5.22548841]

In [None]:
import torch

Xys = []

for row in data:
    X = torch.from_numpy(row['X'])
    y = torch.from_numpy(row['y']).unsqueeze(1)
    
    Xys.append(torch.cat((X, y), dim=1))
    
Xys = torch.stack(Xys)

torch.save(Xys, '../NeuralSymbolicRegressionThatScales-main/tensor2.pt')

In [None]:
a = 0
import time
ini = time.time()
for row in data:
    a += len(row['Target Expression'].traversal)
    
a/100000

In [None]:
from scripts.expression import Expression
from scripts.model import ETIN_model
import torch
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



'''

HACER QUE TE CREE LA EXPRESIÓN DE OTRA MANERA:

    . En cambio de coger un valor probabilísticamente, coger el máximo con cierta probabilidad
      y con otra probabilidad coger un valor aleatorio del vector según probabilidades.

'''

# path = '/home/gridsan/amorenas/ETIN3/outputs/rl/model_80000.pt'
path = None

def nrmse(y_pred, y_true):
    std_y = np.std(y_true)
    nrmse = np.sqrt(np.mean((y_pred - y_true)**2)) / std_y
    return nrmse, 5 / (1 + nrmse)

if path is None:
    etin_model = ETIN_model(config.Model, language.info_for_model)
else:
    etin_model = ETIN_model.load_from_checkpoint(path, cfg=config.Model, info_for_model=language.info_for_model)

etin_model.to(device)
errors = []
rewards = []
for i, row in enumerate(data):
    new_expr = Expression(language, model=etin_model, prev_info=row)
    if i == 5:
        print(row['Target Expression'].to_sympy())
        print(new_expr.to_sympy())
        a = bbb
    y_pred = new_expr.evaluate(row['X'])
    if (np.isnan(y_pred).any() or np.abs(y_pred).max() > 1e5 or np.abs(y_pred).min() < 1e-2):
        continue
    error, reward = nrmse(y_pred, row['y'])
    errors.append(error)
    rewards.append(reward)

print(np.mean(errors), np.mean(rewards))

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Generate data on commute times.
rewards_series = pd.Series(rewards)

rewards_series.plot.hist(grid=True, bins=20, rwidth=0.9,
                         color='#607c8e')
plt.title('Rewards for 500 equations')
plt.xlabel('Counts')
plt.ylabel('Rewards')
plt.grid(axis='y', alpha=0.75)