In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import sys
sys.path.append("/content/drive/MyDrive/Colab Notebooks")

In [None]:
cd /content/drive/MyDrive/Colab Notebooks

/content/drive/MyDrive/Colab Notebooks


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as LA
import pandas as pd
#import pysindy as ps
from tqdm import tqdm
from termcolor import colored
from scipy.optimize import minimize
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
import yaml
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split


from PolyDiff import PolyDiffPoint
from mpl_toolkits.mplot3d import Axes3D

%matplotlib inline
plt.rcParams["font.family"] = 'Arial'
np.set_printoptions(suppress=True)

In [None]:
import random
import numpy as np
import pandas as pd
import glob
import cv2 as cv
import tensorflow as tf


class MAMLDataLoader:

    def __init__(self, data_path='/content/drive/MyDrive/Colab Notebooks/', batch_size=50, n_way=2, k_shot=60, q_query=20):
        """
        MAML数据读取器
        :param data_path: 数据路径，此文件夹下需要有分好类的子文件夹,三个数据文件
        :param batch_size: 有多少个不同的任务
        :param n_way: 一个任务中包含两类
        :param k_shot: 一个类中有5个图片用于Inner looper的训练
        :param q_query: 一个类中有10个图片用于Outer looper的训练
        """
        data_path='/content/drive/MyDrive/Colab Notebooks/'
        self.file_list = [f for f in glob.glob( data_path+ "**/EulerX*", recursive=True)]
        self.steps = len(self.file_list) // batch_size

        self.n_way = n_way
        self.k_shot = k_shot
        self.q_query = q_query
        self.meta_batch_size = batch_size

    def __len__(self):
        return self.steps

    def get_one_task_data(self,seed):
        """
        获取一个task,一个task内有n_way个类,每个类有k_shot张用于inner训练,q_query张用于outer训练
        :return: support_data, query_data
        """
        np.random.seed(seed)
        img_dirs = random.sample(self.file_list, self.n_way)  #随机选出两个地址
        support_data = []
        query_data = []

        support_x = []
        support_y = []
        query_x = []
        query_y = []

        for label, img_dir in enumerate(img_dirs):     #label=0、1、2
            img_list = [f for f in glob.glob(img_dir , recursive=True)]
            Data = pd.read_csv(img_list[0])
            Data=Data.values
            Data=Data[:,1:]

            idx_x = np.random.choice(Data.shape[0], self.k_shot + self.q_query, replace=False)   #每次随机取
            data1 = Data[idx_x, :]  #images=data1

            support_data.append(data1[:self.k_shot])
            query_data.append(data1[self.k_shot:])


        # shuffle support set
        random.shuffle(support_data)

        for data in support_data:

            support_x.append(data[:,0:17])
            support_y.append(data[:,17])

        # shuffle query set
        random.shuffle(query_data)
        for data in query_data:

          query_x.append(data[:,0:17])
          query_y.append(data[:,17])

        return np.array(support_x).reshape(-1,17), np.array(support_y).reshape(-1,1), np.array(query_x).reshape(-1,17), np.array(query_y).reshape(-1,1)

    def get_one_batch(self,batchseed):
        """
        获取一个batch的样本,这里一个batch中是以task为个体
        :return: k_shot_data, q_query_data
        """

        while True:
            batch_support_x = []
            batch_support_y = []
            batch_query_x = []
            batch_query_y = []

            for i in range(self.meta_batch_size):
                support_x, support_y, query_x, query_y = self.get_one_task_data(i+batchseed*self.meta_batch_size)
                batch_support_x.append(support_x)      #三维
                batch_support_y.append(support_y)
                batch_query_x.append(query_x)
                batch_query_y.append(query_y)

            yield np.array(batch_support_x), np.array(batch_support_y), \
                  np.array(batch_query_x), np.array(batch_query_y)


In [None]:
from tensorflow.keras import layers, models, losses,  optimizers, initializers
import tensorflow as tf
import numpy as np
from sklearn.metrics import r2_score


class MAML:
    def __init__(self, input_shape=(17,)):
        """
        MAML模型类,需要两个模型，一个是作为真实更新的权重θ，另一个是用来做θ'的更新
        :param input_shape: 模型输入shape
        :param output_shape: 模型输出shape
        """
        self.input_shape = input_shape
        #self.output_shape = output_shape
        self.meta_model = self.get_maml_model()

    def get_maml_model(self):
        """
        建立maml模型
        :return: maml model
        """
        model = models.Sequential([ #使用models.Sequential()来搭建神经网络
            layers.Dense(20, activation = "relu", input_shape=self.input_shape,
                         kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=1),
                         bias_initializer='zeros'),
            layers.Dense(15, activation = "relu",kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=1, seed=1),
                         bias_initializer='zeros' ),

            layers.Dense(1,kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=1),
                         bias_initializer='zeros'),
            ]) #全连接层，两个隐藏层，第一层有20个神经元，第二层有15个神经元，激活函数为relu,使用l2正则化,有默认的初始化权重参数



        return model

    def train_on_batch(self, train_data, inner_optimizer=optimizers.Adam(0.01), inner_step=2, outer_optimizer=optimizers.Adam(0.01)):
        """
        MAML一个batch的训练过程
        :param train_data: 训练数据,以task为一个单位
        :param inner_optimizer: support set对应的优化器
        :param inner_step: 内部更新几个step
        :param outer_optimizer: query set对应的优化器,如果对象不存在则不更新梯度
        :return: batch query loss
        """
        batch_r2 = []
        batch_loss = []
        task_weights = []

        # 用meta_weights保存一开始的权重，并将其设置为inner step模型的权重
        meta_weights = self.meta_model.get_weights()



        #tf.random.set_seed(1)


        meta_support_x, meta_support_y, meta_query_x, meta_query_y = next(train_data)

        for support_x, support_y in  zip(meta_support_x, meta_support_y):

            # 每个task都需要载入最原始的weights进行更新
            self.meta_model.set_weights(meta_weights)
            for _ in range(inner_step):
                with tf.GradientTape() as tape:

                    logits= self.meta_model(support_x, training=True)

                    loss = losses.mean_squared_error(support_y, logits) #均方误差,求的是对应位置的差的平方

                    #loss = losses.sparse_categorical_crossentropy(support_label, logits)
                    loss = tf.reduce_mean(loss)



                    r2=r2_score(support_y, logits)

                    #r2 = tf.reduce_mean(r2)
                    #acc = tf.cast(tf.argmax(logits, axis=-1, output_type=tf.int32) == support_label, tf.float32)
                    #acc = tf.reduce_mean(acc)

                grads = tape.gradient(loss, self.meta_model.trainable_variables)

                inner_optimizer.apply_gradients(zip(grads, self.meta_model.trainable_variables))

            # 每次经过inner loop更新过后的weights都需要保存一次，保证这个weights后面outer loop训练的是同一个task
            task_weights.append(self.meta_model.get_weights())

        with tf.GradientTape() as tape:
            for i, (query_x, query_y) in enumerate(zip(meta_query_x, meta_query_y)):

                # 载入每个task weights进行前向传播
                self.meta_model.set_weights(task_weights[i])

                logits = self.meta_model(query_x, training=True)

                loss = losses.mean_squared_error(query_y, logits)

                loss = tf.reduce_mean(loss)
                batch_loss.append(loss)

                r2=r2_score(query_y, logits)
                #r2 = tf.reduce_mean(r2)
                #acc = tf.cast(tf.argmax(logits, axis=-1) == query_label, tf.float32)
                #acc = tf.reduce_mean(acc)
                batch_r2.append(r2)

            mean_r2 = tf.reduce_mean(batch_r2)
            mean_loss = tf.reduce_mean(batch_loss)

        # 无论是否更新，都需要载入最开始的权重进行更新，防止val阶段改变了原本的权重
        self.meta_model.set_weights(meta_weights)
        if outer_optimizer:

            grads = tape.gradient(mean_loss, self.meta_model.trainable_variables)
            outer_optimizer.apply_gradients(zip(grads, self.meta_model.trainable_variables)) #将计算得到的梯度更新到变量上



        return mean_loss, mean_r2

In [None]:

maml = MAML()

#batch_train_loss ,r2= maml.train_on_batch(train_data)
#print(batch_train_loss,r2)



In [None]:
epochs=6
for e in range(epochs):


        print('\nEpoch {}/{}'.format(e+1,epochs))

        #train_meta_loss = []
        #train_meta_r2 = []


        for seed in range(5):
          batch_train_loss, r2 = maml.train_on_batch(train_data = MAMLDataLoader().get_one_batch(seed))
          print(batch_train_loss, r2)

        #train_meta_loss.append(batch_train_loss)
        #train_meta_r2.append(r2)
        print(batch_train_loss, r2)










Epoch 1/6
tf.Tensor(0.8640944, shape=(), dtype=float32) tf.Tensor(0.03776248497065589, shape=(), dtype=float64)
tf.Tensor(0.90246767, shape=(), dtype=float32) tf.Tensor(0.014672927198451343, shape=(), dtype=float64)
tf.Tensor(0.891287, shape=(), dtype=float32) tf.Tensor(0.07152203519620784, shape=(), dtype=float64)
tf.Tensor(0.85897183, shape=(), dtype=float32) tf.Tensor(0.11767579008461942, shape=(), dtype=float64)
tf.Tensor(0.8422353, shape=(), dtype=float32) tf.Tensor(0.17885067944481617, shape=(), dtype=float64)
tf.Tensor(0.8422353, shape=(), dtype=float32) tf.Tensor(0.17885067944481617, shape=(), dtype=float64)

Epoch 2/6
tf.Tensor(0.6390736, shape=(), dtype=float32) tf.Tensor(0.3099289606803547, shape=(), dtype=float64)
tf.Tensor(0.5670383, shape=(), dtype=float32) tf.Tensor(0.42187602780379513, shape=(), dtype=float64)
tf.Tensor(0.41608706, shape=(), dtype=float32) tf.Tensor(0.5306439952726401, shape=(), dtype=float64)
tf.Tensor(0.34976077, shape=(), dtype=float32) tf.Tensor(0.

In [None]:
maml.meta_model.save_weights("maml.h5")

In [None]:
maml.meta_model.load_weights("maml.h5")

In [None]:
X_library = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Euler自变量.csv')
X_library=X_library.values
X_library=X_library[:,1:]
y_library = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Euler因变量.csv')
y_library=y_library.values
y_library=y_library[:,1:]
xx = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Euler量纲数.csv')
xx=xx.values
xx=xx[:,1:]


In [None]:
import torch
import numpy as np
import torch
class ini_BP_net(torch.nn.Module):

    def __init__(self, n_feature, n_hidden1, n_hidden2 ,n_output):
        super(ini_BP_net, self).__init__()
        # 构造隐含层和输出层
        #torch.random.manual_seed(0)
        self.hidden1 = torch.nn.Linear(n_feature, n_hidden1)
        self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)
        #self.hidden3 = torch.nn.Linear(n_hidden2, n_hidden3)
        self.output = torch.nn.Linear(n_hidden2, n_output)
        a=maml.meta_model.get_weights()[0].T
        aa=np.zeros(20).reshape(20,1)
        a=np.concatenate([a, aa],axis=1)
        a=torch.tensor(a)
        a = a.to(torch.float32)
        a=torch.nn.Parameter(a)

        b=maml.meta_model.get_weights()[2].T
        b=torch.tensor(b)
        b = b.to(torch.float32)
        b=torch.nn.Parameter(b)

        c=maml.meta_model.get_weights()[4].T
        c=torch.tensor(c)
        c = c.to(torch.float32)
        c=torch.nn.Parameter(c)

        a1=maml.meta_model.get_weights()[1]
        a1=torch.tensor(a1)
        a1 = a1.to(torch.float32)
        a1=torch.nn.Parameter(a1)

        b1=maml.meta_model.get_weights()[3]
        b1=torch.tensor(b1)
        b1 = b1.to(torch.float32)
        b1=torch.nn.Parameter(b1)

        c1=maml.meta_model.get_weights()[5].T
        c1=torch.tensor(c1)
        c1 = c1.to(torch.float32)
        c1=torch.nn.Parameter(c1)
        self.hidden1.weight=a
        self.hidden1.bias=a1
        self.hidden2.weight=b
        self.hidden2.bias=b1
        self.output.weight=c
        self.output.bias=c1

    def forward(self, x):
        # 前向计算
        hid1 = torch.relu(self.hidden1(x))
        hid2 = torch.relu(self.hidden2(hid1))
        #hid3 = torch.relu(self.hidden3(hid2))
        out = self.output(hid2)
        return out
def train(model, epochs, learning_rate, x_train, y_train):
    """
    :param model: 模型
    :param epochs: 最大迭代次数
    :param learning_rate:学习率
    :param x_train:训练数据（输入）
    :param y_train:训练数据（输出）
    :return: 最终的loss值（MSE）
    """

    loss_fc = torch.nn.MSELoss(reduction="sum")
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    #loss_list = []
    for i in range(epochs):
        model.train()
        # 前向计算
        data = model(x_train)
        # 计算误差
        loss = loss_fc(data, y_train)
        #loss_list.append(loss)
        # 更新梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 更新参数
        optimizer.step()

        # print("This is {} th iteration,MSE is {}。".format(i+1,loss))
    # 返回误差是为了观测网络训练过程的误差下降情况
    #loss_ls = [loss_list[i].detach().numpy() for i in range(len(loss_list))]

    return loss

In [None]:
class DimensionlessLearning(object):
    '''
    Indentify the explicit form one coefficient using dimensionless learning
    '''
    def __init__(self, X_library, xx, y_library, dimension_info, basis_list, X1_test, xx_test, y_test):
        super(DimensionlessLearning, self).__init__()
        self.X_library = X_library
        self.xx = xx
        self.y_library = y_library
        self.X1_test=X1_test
        self.xx_test=xx_test
        self.y_test=y_test
        self.dimension_info, self.basis_list = dimension_info, basis_list
        self.basis1_in, self.basis2_in = self.basis_list[0], self.basis_list[1]
        self.basis_num = 2


    def fetch_coef_pi(self, coef):
        '''
        parse the combined weights for the input
        '''
        coef_pi = coef[0] * self.basis1_in + coef[1] * self.basis2_in
        return coef_pi
    def check_dimension(self, coef):
        '''
        check whether the basis vectors can formulated as the D_out
        '''
        coef_pi = self.fetch_coef_pi(coef)
        # print('[check] coef_pi: \n', coef_pi)
        target_D_out = np.dot(self.dimension_info[0], coef_pi)
        # print('[check] target_D_out: \n', target_D_out)
        assert np.array_equal(target_D_out, self.dimension_info[1]), 'Wrong target_D_out!'


    def scale_input(self, xx, coef):
        '''
        scale the the input data into a few dimensionless numbers
        '''


        power_index = self.fetch_coef_pi(coef)
        X_scaled =np.prod(np.power(xx, power_index.reshape(-1,)), axis=1).reshape(-1, 1)
        return X_scaled, power_index

    def fit(self, method='pattern_search', init_point_num=20, seed=0):
        '''
        fit the data
        '''
        BASE = 2 ** 20
        r2, basis_coef, scaling_coef = 0, None, None

        for idx in range(init_point_num):
            seed = (idx + seed * init_point_num) % BASE

            if method == 'pattern_search':
                r2_temp, basis_coef_temp = self.fit_pattern_search(seed=seed)
            elif method == 'gradient_descent':
                r2_temp, basis_coef_temp = self.fit_gradient_descent(seed=seed)
                print('r2_temp, basis_coef_temp, scaling_coef_temp', r2_temp, basis_coef_temp)
            else:
                raise Exception('Error in the optimizaiton method')

            if r2_temp < r2:
                continue
            r2, basis_coef = r2_temp, basis_coef_temp

        return r2, basis_coef

    def _opt(self, basis_coef):
        '''
        fit a linear regression
        '''
        # prepare polynomial features based on dimensionless numbers
        X_scaled, power_index = self.scale_input(self.xx, basis_coef)

        X=np.concatenate((self.X_library,X_scaled),axis=1)

        n_feature=X.shape[1]
        y=self.y_library
        n_output=y.shape[1]
        model = ini_BP_net(n_feature, 20, 15,  n_output)
        learn_data =8e-3
        X=torch.tensor(X)
        y=torch.tensor(y)
        X = X.to(torch.float32)
        y = y.to(torch.float32)

        loss_ls = train(model, 180, learn_data, X, y)
        #plt.plot(loss_ls)
        #plt.show()
        y_pred  = model(X)
        r2 = r2_score(y.detach().numpy(), y_pred.detach().numpy())

        Xtest_scaled =np.prod(np.power(self.xx_test, power_index.reshape(-1,)), axis=1).reshape(-1, 1)
        Xtest=np.concatenate((self.X1_test,Xtest_scaled),axis=1)
        Xtest=torch.tensor(Xtest)
        y_test=torch.tensor(self.y_test)
        Xtest = Xtest.to(torch.float32)
        y_test = y_test.to(torch.float32)
        ytest_pred  = model(Xtest)
        r2_test = r2_score(y_test.detach().numpy(), ytest_pred.detach().numpy())


        X2_scaled, power_index2 = self.scale_input(self.xx_test, np.array([0,0]))
        X2=np.concatenate((self.X1_test,X2_scaled),axis=1)
        X2=torch.tensor(X2)
        X2 = X2.to(torch.float32)
        y2_pred  = model(X2)
        r22 = r2_score(y_test.detach().numpy(), y2_pred.detach().numpy())


        return r2, power_index, r2_test, r22

    def fit_pattern_search(self, max_iter=5, grid_range=[-2, 2], grid_interval=0.5, clip_threshold=0.2, seed=0):
        '''
        pattern search-based optimization

        Args:
            gamma0 [float]: basis coefficient for the first basis vector;
            max_iter [int]: the maximum iteration for pattern search;
            grid_range [list of int]: the range for each basis coefficient;
            grid_interval [float]: the grid interval for each basis coefficient;
            clip_threshold [float]: if the R2 score for the initial point is lower than this threshold, break the loop;
        Return:
            r2 [float]: the final R2 score
            power_index [array]: the power law index for one input dimensionless nunber
            scaling_coef [array]: the scaling law coefficients
        '''
        def _get_coordinates(basis_num, basis_coef, grid_interval):
            '''
            build a list to store all possible coordiantes
            '''
            # prepare grid
            param_grid = {}
            center_coord = []
            coord_all = []
            for i in range(basis_num):
                gamma_value = basis_coef[i]
                param_grid[f'gamma{i}'] = [gamma_value-grid_interval, gamma_value, gamma_value+grid_interval]
                center_coord.append(gamma_value)

            # build grid
            grid = ParameterGrid(param_grid)
            for params in grid:
                coord = []
                for i in range(basis_num):
                    coord.append(params[f'gamma{i}'])
                if coord != center_coord:
                    coord_all.append(coord)
            return coord_all

        def _init_basis_coef(basis_num, grid_range, grid_interval):
            '''
            initialize basis coefficient
            '''
            grid_num = int((grid_range[1] - grid_range[0]) / grid_interval + 1)
            basis_coef = np.zeros((basis_num))
            for i in range(basis_num):
                basis_coef[i] = np.random.choice(np.linspace(grid_range[0], grid_range[1], grid_num), 1)[0]  # [-2, 2]
                # basis_coef[i] = 1


            return basis_coef

        # initial basis coefficients
        np.random.seed(seed)
        basis_coef = _init_basis_coef(self.basis_num, grid_range, grid_interval)

        # randomly initialize different center points
        iter_num = 0
        while iter_num < max_iter:
            candidate_coord = _get_coordinates(self.basis_num, basis_coef, grid_interval)
            # calculate the center point
            r2_center, power_index_center ,r2_test_center, r22_center= self._opt(basis_coef)
            # print('r2_center', round(r2_center, 4), 'power_index_center', [round(each, 2) for each in list(power_index_center.reshape(-1,))])

            # break if the initial point has a low R2 score
            if r2_center < clip_threshold:
                break

            # calculate the neighboring points
            r2_bounds_val = []
            power_index_bounds_val = []
            r2_test_bounds_val=[]
            r2_r22_bounds_val=[]

            for gamma_list in candidate_coord:
                basis_coef_temp = np.array(gamma_list)

                r2_bound, power_index_bound ,r2_test_bound, r22_bound = self._opt(basis_coef_temp)
                r2_bounds_val.append(r2_bound)
                power_index_bounds_val.append(power_index_bound)
                r2_test_bounds_val.append(r2_test_bound)
                r2_r22_bounds_val.append(r2_test_bound-r22_bound)


            # sort the R2 score for neighboring points from high to low
            highest_index = np.argsort(r2_r22_bounds_val)[::-1][0]
            #highest_index = np.argsort(r2_bounds_val)[::-1][0]
            iter_num += 1

            # udpate the center coordiantes if the neighboring point has a higher R2 score
            if r2_bounds_val[highest_index]>0.9:
              if max(abs(power_index_bounds_val[highest_index]))>0.001:
                #if sum(abs(power_index_bounds_val[highest_index]))<=8:
                  #if  0.9 < r2_test_bounds_val[highest_index]:
                    if r2_r22_bounds_val[highest_index]>(r2_test_center-r22_center):
                       basis_coef = np.array(candidate_coord[highest_index])
                       power_index = self.fetch_coef_pi(basis_coef)
                       r2, power_index, r2_test, r22= self._opt(basis_coef)
                       #print(r2,power_index.reshape(1,-1), r2_test,r22,r2_test-r22)

                # res_info = {'gamma_list': gamma_list, 'r2_center': round(r2_bounds_val[highest_index], 4)}
            else:
                break

        # calcualte the final R2 and coefficients
        r2, power_index, r2_test, r22= self._opt(basis_coef)

        return r2, power_index, r2_test, r22


In [None]:
def recover_coef1(seed):


    D_in = np.mat('-1, -1, 1; 1, 0, 0; 1, -1, 0; -3, 0, 1; -1, -2, 1').T
    D_out = np.mat('0;, 0; 0')
    dimension_info = [D_in, D_out]

    basis1_in = np.array([-1, 1, 1, 1, 0]).reshape(-1, 1)
    basis2_in = np.array([-1, 1, -1, 0, 1]).reshape(-1, 1)
    basis_list = [basis1_in, basis2_in]
    X=np.concatenate((X_library, xx),axis=1)
    np.random.seed(0)

    idx_X = np.random.choice(5400, 100, replace=False)
    X_train1=X[0:5400,:]
    X_train1 = X_train1[idx_X, :]
    X_train2=X[5400:10800,:]
    X_train2 = X_train2[idx_X, :]
    X_train=np.concatenate((X_train1, X_train2),axis=0)

    X_test=X[10800:16200,:]
    X_test = X_test[idx_X, :]
    y_train1=y_library[0:5400,:]
    y_train1 = y_train1[idx_X, :]
    y_train2=y_library[5400:10800,:]
    y_train2 = y_train2[idx_X, :]
    y_train=np.concatenate((y_train1, y_train2),axis=0)

    y_test=y_library[10800:16200,:]
    y_test = y_test[idx_X, :]

    #X_train,X_test, y_train, y_test =train_test_split(X,y_library,test_size=0.25, random_state=0)
    X1_library=X_train[:,0:17]
    xx1=X_train[:,17:22]
    y1_library=y_train
    X1_test=X_test[:,0:17]
    xx_test=X_test[:,17:22]

    dimensionless_learning = DimensionlessLearning(X1_library, xx1, y1_library, dimension_info, basis_list, X1_test, xx_test, y_test)
    # dimensionless_learning.check_dimension(coef=[-1, 1])
    #r2_bound, power_index_bound ,r2_test_bound, r22_bound = dimensionless_learning._opt([-0.5,0.5])
    #print(r2_bound, power_index_bound.flatten() ,r2_test_bound, r22_bound,r2_test_bound-r22_bound)

    # pattern search
    r2, coef, r2_test, r22= dimensionless_learning.fit_pattern_search(seed=seed)

    if r2 > 0.9:
      print('final r2', r2, coef.flatten(), r2_test, r22, r2_test-r22)
    #r2_bound, power_index_bound ,r2_test_bound, r22_bound = dimensionless_learning._opt([-1,1])
    #print(r2_bound, power_index_bound.flatten() ,r2_test_bound, r22_bound,r2_test_bound-r22_bound)

for i in range(20):
  if i !=2:
    recover_coef1(seed=i)



final r2 0.988510836817302 [ 2.  -2.   3.   0.5 -2.5] 0.8595937043341382 0.8595937043341382 0.0
final r2 0.929024932991756 [ 0.   0.  -1.  -0.5  0.5] 0.8910578824093627 0.38972458898460616 0.5013332934247565
final r2 0.929024932991756 [ 0.   0.  -1.  -0.5  0.5] 0.8910578824093627 0.38972458898460616 0.5013332934247565
final r2 0.988510836817302 [ 2.  -2.   3.   0.5 -2.5] 0.8595937043341382 0.8595937043341382 0.0
final r2 0.988510836817302 [ 3.5 -3.5 -0.5 -2.  -1.5] 0.8595937043341382 0.8595937043341382 0.0
final r2 0.988595356786195 [-0.5  0.5  2.5  1.5 -1. ] 0.8689425149807757 0.8679124740055976 0.0010300409751781148
final r2 0.988510836817302 [ 3. -3.  1. -1. -2.] 0.8595937043341382 0.8595937043341382 0.0
final r2 0.988510836817302 [ 2.  -2.   3.   0.5 -2.5] 0.8595937043341382 0.8595937043341382 0.0
final r2 0.988510836817302 [ 2.5 -2.5  1.5 -0.5 -2. ] 0.8595937043341382 0.8595937043341382 0.0


In [None]:
!pip install -Uq pysr pytorch_lightning

In [None]:
%%shell
set -e

#---------------------------------------------------#
JULIA_VERSION="1.8.5"
export JULIA_PKG_PRECOMPILE_AUTO=0
#---------------------------------------------------#

if [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  wget -nv $URL -O /tmp/julia.tar.gz # -nv means "not verbose"
  tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
  rm /tmp/julia.tar.gz

  echo "Installing PyCall.jl..."
  julia -e 'using Pkg; Pkg.add("PyCall"); Pkg.build("PyCall")'
  julia -e 'println("Success")'

fi



In [None]:
import pysr
pysr.install(precompile=False)



In [None]:
from julia import Julia

julia = Julia(compiled_modules=False, threads='auto')
from julia import Main
from julia.tools import redirect_output_streams

redirect_output_streams()

In [None]:
power_index=np.array([ 0. ,  0. ,  -1 ,  -0.5 , 0.5])
X_scaled =np.prod(np.power(xx, power_index.reshape(-1,)), axis=1).reshape(-1, 1)
X=np.concatenate((X_library,X_scaled),axis=1)

y=y_library

In [None]:
from pysr import PySRRegressor
X=X
y=y

In [None]:
default_pysr_params = dict(
    populations=10,
    model_selection="best",
)
model = PySRRegressor(
    #niterations=30,
    binary_operators=["plus", "mult","-","/","pow"],
    #unary_operators=["log"],
    **default_pysr_params,
    #extra_sympy_mappings={"pow(x,y)": lambda x,y: pow(x,y)},

)


model.fit(X, y)

└ @ SymbolicRegression ~/.julia/packages/SymbolicRegression/Y57Eu/src/SymbolicRegression.jl:430


Note: you are running with more than 10,000 datapoints. You should consider turning on batching (`options.batching`), and also if you need that many datapoints. Unless you have a large amount of noise (in which case you should smooth your dataset first), generally < 10,000 datapoints is enough to find a functional form.
Started!

Expressions evaluated per second: 4.740e+03
Head worker occupation: 0.2%
Progress: 13 / 400 total iterations (3.250%)
Hall of Fame:
---------------------------------------------------------------------------------------------------
Complexity  Loss       Score     Equation
1           9.117e-01  5.603e-02  x11
3           5.463e-01  2.561e-01  (-0.7666689 * x12)
5           2.102e-01  4.776e-01  ((-0.5891778 * x12) - x7)
7           1.626e-01  1.283e-01  ((x12 * -0.79873604) - (x7 * 0.7998485))
11          1.619e-01  1.107e-03  (((-1.4786193 / x17) * x8) - ((x7 + x12) * 0.82001513))
15          1.568e-01  8.004e-03  ((((-0.2123858 - 0.12210648) * (x3 + x7)) + 

In [None]:
print(model)

PySRRegressor.equations_ = [
	   pick     score                                           equation  \
	0        0.000000                                                x11   
	1        0.259407                                 (-0.8437832 * x12)   
	2        0.602658                          (-0.7991712 * (x7 + x12))   
	3        0.000003           ((x12 * -0.79873604) - (x7 * 0.7998485))   
	4        0.279038  ((x16 / (-0.06583656 * 0.004153589)) - (x7 + x...   
	5  >>>>  1.241353  (((x17 * -1.0060347) * (x16 * x17)) - (x7 + x12))   
	6        0.065135  ((((x15 - (x17 * x17)) * x16) - (x7 + x12)) / ...   
	7        0.003518  ((((x15 - ((x17 - x13) * x17)) * x16) - (x7 + ...   
	8        0.005068  ((((x15 - (((x17 - x9) - x13) * x17)) * x16) -...   
	9        0.000812  (((((x7 * x17) - (((x17 - x9) - x13) * x17)) *...   
	
	       loss  complexity  
	0  0.911733           1  
	1  0.542687           3  
	2  0.162587           5  
	3  0.162587           7  
	4  0.093050           9  
	5  

In [None]:
print(model.sympy())

-x12 - 1.0060347*x16*x17**2 - x7
