### Predicting formation enthalpies for solid solutions of Lanthanides Orthophosphates


In [3]:
import os
import sys
import inspect

print(sys.version)

currentdir = os.getcwd()
parentdir = os.path.dirname(currentdir)
grandparentdir = os.path.dirname(parentdir)

sys.path.insert(0, grandparentdir) 

3.9.6 (default, Nov 10 2023, 13:38:27) 
[Clang 15.0.0 (clang-1500.1.0.2.5)]


In [4]:
import read_data
import featureSpan
import lasso
import Utils
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import train_test_split, cross_val_score, ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import scipy.optimize
import pandas as pd
import seaborn as sns
import scipy.stats as ss
from sklearn.linear_model import Lasso
from itertools import combinations, product
import itertools
import math
import pandas as pd

## Monazite

#### Load data from files

In [5]:
list1 = read_data.readData("../../data/DATA_HE_xenotime.dat")
print("Shape of List1 is {}".format(list1.shape))
print(list1)

Shape of List1 is (525, 4)
[[5.70000000e+01 5.80000000e+01 7.50000000e-01 2.31598225e-01]
 [5.70000000e+01 5.80000000e+01 6.25000000e-01 2.78958493e-01]
 [5.70000000e+01 5.80000000e+01 5.00000000e-01 2.97583757e-01]
 ...
 [7.00000000e+01 7.10000000e+01 5.00000000e-01 7.77590152e-02]
 [7.00000000e+01 7.10000000e+01 3.75000000e-01 7.09266168e-02]
 [7.00000000e+01 7.10000000e+01 2.50000000e-01 5.91211365e-02]]


In [6]:
list2 = read_data.readCSVData("../../data/Data_Ln-xenotime.csv", material="xenotime", Volume=True)
print("Shape of List2 is {}".format(list2.shape))
print((list2))
list2 = list2[:,0:8]
print("Z, M, IP2, E. Charge, electronegativity, IP3, Vol")
print((list2))

Shape of List2 is (15, 9)
[[ 57.         138.90547     19.1773      91.7         11.
    1.1         49.95        83.16086967   1.16      ]
 [ 58.         140.116       20.198       99.73        12.
    1.12        36.758       81.11579575   1.143     ]
 [ 59.         140.90765     21.624      112.27        12.3
    1.13        38.98        79.31176899   1.126     ]
 [ 60.         144.242       22.1        120.19        12.95
    1.14        40.41        77.89975607   1.109     ]
 [ 61.         145.          22.3        127.26        13.6
    1.13        41.1         76.63526873   1.093     ]
 [ 62.         150.36        23.4        137.16        14.25
    1.17        41.4         75.42961529   1.079     ]
 [ 63.         151.964       24.92       143.83        14.9
    1.2         42.7         74.36126731   1.066     ]
 [ 64.         157.25        20.63       149.42        15.9
    1.2         44.          73.33463097   1.053     ]
 [ 65.         158.92535     21.91       156.33       

#### Generated Elemental Training/Testing sets from loaded data

In [7]:
def generateFeatures_full2(HEList, featureList):
    X = []
    Y = []
    
    for i in range(len(HEList)):
        Y.append(HEList[i][3])
        
        x = HEList[i][2]
        #Endmember 1
        X1 = featureList[int(HEList[i][0]-57)]
        #Endmember 2
        X2 = featureList[int(HEList[i][1]-57)]

        #X.append(np.concatenate((np.array([x, 1/x, x * x, 1 / (x * x), 1-x, 1/(1-x), (1-x) * (1-x), 1/( (1-x) * (1-x))]), np.array([abs(X1-X2)/2.0, 2.0 / (abs(X1-X2)), (X1 + X2) / 2.0, 2.0 / (X1 + X2)]).flatten())))
        # define the features related to m
        mfeatures = np.array([x, 1/x, x * x, 1 / (x * x), 1-x, 1/(1-x), (1-x) * (1-x), 1/( (1-x) * (1-x))])
        
        f1 = []
        f2 = []
        f3 = []
        f4 = []

        #for Z, M, IP2, E, Charge, generating features including AD, 1/AD, AM, 1 / AM
        for j in range(len(X1)-3):
            f1.append([abs(X1[j]-X2[j])/2.0])
            f2.append([2.0/abs(X1[j]-X2[j])])
            f3.append([(X1[j] + X2[j]) / 2.0])
            f4.append([2.0 / (X1[j] + X2[j])])
            
        #rint(f3)
        #print(np.asarray(f3).flatten())
    
        #print(np.concatenate((mfeatures, np.asarray(f1).flatten(), np.asarray(f3).flatten())))
              
        f5 = []
        f6 = []
        f7 = []

        #for electronegativity, only generating AD, AM, 1 / AM
        for j in range(5,6):
            f5.append([abs(X1[j]-X2[j])/2.0])
            f6.append([(X1[j] + X2[j]) / 2.0])
            f7.append([2.0 / (X1[j] + X2[j])])            


        f8 = []
        f10 = []
        f11 = []
        
        #for IP3, generating features including AD, 1/AD, AM, 1 / AM    
        for j in range(6,7):
            f8.append([abs(X1[j]-X2[j])/2.0])
            f10.append([(X1[j] + X2[j]) / 2.0])
            f11.append([2.0 / (X1[j] + X2[j])])
            
        f12 = []
        f13 = []
        f14 = []
        f15 = []
        f16 = []
        f17 = []
        f18 = []
        f19 = []
        f20 = []
        f21 = []
        f22 = []
        f23 = []
        
        #for Vol
        for j in range(7,len(X1)):
            f12.append([abs(X1[j]-X2[j])/2.0])
            f13.append([2.0/abs(X1[j]-X2[j])])
            f14.append([(X1[j] + X2[j]) / 2.0])
            f15.append([2.0 / (X1[j] + X2[j])])
            f16.append([pow(abs(X1[j]-X2[j])/2.0,2)])
            f17.append([pow(2.0/abs(X1[j]-X2[j]),2)])
            f18.append([pow((X1[j] + X2[j]) / 2.0,2)])
            f19.append([pow(2.0 / (X1[j] + X2[j]),2)])
            f20.append([pow(abs(X1[j]-X2[j])/2.0,3)])
            f21.append([pow(2.0/abs(X1[j]-X2[j]),3)])
            f22.append([pow((X1[j] + X2[j]) / 2.0,3)])
            f23.append([pow(2.0 / (X1[j] + X2[j]),3)])
            
        
        X.append(np.concatenate((mfeatures, np.asarray(f1).flatten(),  np.asarray(f2).flatten(),  np.asarray(f3).flatten(), np.asarray(f4).flatten(), np.asarray(f5).flatten(), np.asarray(f6).flatten(),np.asarray(f7).flatten(),np.asarray(f8).flatten(),np.asarray(f10).flatten(),np.asarray(f11).flatten(),np.asarray(f12).flatten(),np.asarray(f13).flatten(),np.asarray(f14).flatten(),np.asarray(f15).flatten(),np.asarray(f16).flatten(),np.asarray(f17).flatten(),np.asarray(f18).flatten(),np.asarray(f19).flatten(),np.asarray(f20).flatten(),np.asarray(f21).flatten(),np.asarray(f22).flatten(),np.asarray(f23).flatten()     )))
            
    return X, Y

In [8]:
listX, listY = generateFeatures_full2(list1, list2)
X = np.asarray(listX)
Y = np.asarray(listY)

print(X.shape)
print(Y.shape)

elemental_list = ["m", "1/m", "m^2", "(1/m)^2", "(1-m)", "1/(1-m)", "(1-m)^2", "(1/(1-m))^2", "[AD:Z]", "[AD:M]", "[AD:IP2]", "[AD:Young]", "[AD:Charge]",  "(1/[AD:Z])", "(1/[AD:M])", "(1/[AD:IP2])", "(1/[AD:Young])", "(1/[AD:Charge])",  "[AM:Z]", "[AM:M]", "[AM:IP2]", "[AM:Young]", "[AM:Charge]",  "(1/[AM:Z])", "(1/[AM:M])", "(1/[AM:IP2])", "(1/[AM:Young])", "(1/[AM:Charge])", "[AD:electronegativity]", "[AM:electronegativity]","(1/[AM:electronegativity])", "[AD:IP3]", "[AM:IP3]", "(1/[AM:IP3])" ,  "[AD:Vol]", "(1/[AD:Vol])","[AM:Vol]","(1/[AM:Vol])",   "([AD:Vol])^2", "(1/[AD:Vol])^2" , "([AM:Vol])^2","(1/[AM:Vol])^2",    "([AD:Vol])^3", "(1/[AD:Vol])^3", "([AM:Vol])^3", "(1/[AM:Vol])^3"         ]

print(len(elemental_list))
print(elemental_list)
m, n = X.shape

elemental_features = []

for i in elemental_list:
    elemental_features.append("("+i+")")
        
print("The elemental features are: \n {}".format(elemental_features))

(525, 46)
(525,)
46
['m', '1/m', 'm^2', '(1/m)^2', '(1-m)', '1/(1-m)', '(1-m)^2', '(1/(1-m))^2', '[AD:Z]', '[AD:M]', '[AD:IP2]', '[AD:Young]', '[AD:Charge]', '(1/[AD:Z])', '(1/[AD:M])', '(1/[AD:IP2])', '(1/[AD:Young])', '(1/[AD:Charge])', '[AM:Z]', '[AM:M]', '[AM:IP2]', '[AM:Young]', '[AM:Charge]', '(1/[AM:Z])', '(1/[AM:M])', '(1/[AM:IP2])', '(1/[AM:Young])', '(1/[AM:Charge])', '[AD:electronegativity]', '[AM:electronegativity]', '(1/[AM:electronegativity])', '[AD:IP3]', '[AM:IP3]', '(1/[AM:IP3])', '[AD:Vol]', '(1/[AD:Vol])', '[AM:Vol]', '(1/[AM:Vol])', '([AD:Vol])^2', '(1/[AD:Vol])^2', '([AM:Vol])^2', '(1/[AM:Vol])^2', '([AD:Vol])^3', '(1/[AD:Vol])^3', '([AM:Vol])^3', '(1/[AM:Vol])^3']
The elemental features are: 
 ['(m)', '(1/m)', '(m^2)', '((1/m)^2)', '((1-m))', '(1/(1-m))', '((1-m)^2)', '((1/(1-m))^2)', '([AD:Z])', '([AD:M])', '([AD:IP2])', '([AD:Young])', '([AD:Charge])', '((1/[AD:Z]))', '((1/[AD:M]))', '((1/[AD:IP2]))', '((1/[AD:Young]))', '((1/[AD:Charge]))', '([AM:Z])', '([AM:M]

In [9]:
dfX = pd.DataFrame(data=X, columns=elemental_features)

new_features=[]
new_columns = []

for i in range(n):
    for j in range(i):
        new_features.append(elemental_features[i]+"*"+elemental_features[j])
        new_columns.append(dfX[elemental_features[i]].values * dfX[elemental_features[j]].values)


for i in range(n):
    for j in range(i):
        for k in range(j):
            new_features.append(elemental_features[i]+"*"+elemental_features[j]+"*"+elemental_features[k])
            new_columns.append(dfX[elemental_features[i]] * dfX[elemental_features[j]] * dfX[elemental_features[k]])
            
            
new_columns = np.asarray(new_columns)
dfX = pd.concat(
    [
        dfX,
        pd.DataFrame(
            new_columns.T, 
            index=dfX.index, 
            columns=new_features
        )
    ], axis=1
)

dfX.head()

dfX.std() == 0
dfX=dfX.loc[:, dfX.std() > 0]
dfX.head()

Unnamed: 0,(m),(1/m),(m^2),((1/m)^2),((1-m)),(1/(1-m)),((1-m)^2),((1/(1-m))^2),([AD:Z]),([AD:M]),...,((1/[AM:Vol])^3)*(([AM:Vol])^3)*([AD:Vol]),((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AD:Vol])),((1/[AM:Vol])^3)*(([AM:Vol])^3)*([AM:Vol]),((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AM:Vol])),((1/[AM:Vol])^3)*(([AM:Vol])^3)*(([AD:Vol])^2),((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AD:Vol])^2),((1/[AM:Vol])^3)*(([AM:Vol])^3)*(([AM:Vol])^2),((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AM:Vol])^2),((1/[AM:Vol])^3)*(([AM:Vol])^3)*(([AD:Vol])^3),((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AD:Vol])^3)
0,0.75,1.333333,0.5625,1.777778,0.25,4.0,0.0625,16.0,0.5,0.605265,...,1.022537,0.97796,82.138333,0.012175,1.045582,0.956405,6746.705701,0.000148,1.069146,0.935326
1,0.625,1.6,0.390625,2.56,0.375,2.666667,0.140625,7.111111,0.5,0.605265,...,1.022537,0.97796,82.138333,0.012175,1.045582,0.956405,6746.705701,0.000148,1.069146,0.935326
2,0.5,2.0,0.25,4.0,0.5,2.0,0.25,4.0,0.5,0.605265,...,1.022537,0.97796,82.138333,0.012175,1.045582,0.956405,6746.705701,0.000148,1.069146,0.935326
3,0.375,2.666667,0.140625,7.111111,0.625,1.6,0.390625,2.56,0.5,0.605265,...,1.022537,0.97796,82.138333,0.012175,1.045582,0.956405,6746.705701,0.000148,1.069146,0.935326
4,0.25,4.0,0.0625,16.0,0.75,1.333333,0.5625,1.777778,0.5,0.605265,...,1.022537,0.97796,82.138333,0.012175,1.045582,0.956405,6746.705701,0.000148,1.069146,0.935326


In [10]:
print(len(dfX.columns.values))
np.array(dfX.columns.values)

16255


array(['(m)', '(1/m)', '(m^2)', ...,
       '((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AM:Vol])^2)',
       '((1/[AM:Vol])^3)*(([AM:Vol])^3)*(([AD:Vol])^3)',
       '((1/[AM:Vol])^3)*(([AM:Vol])^3)*((1/[AD:Vol])^3)'], dtype=object)

In [11]:
def LassoFit(lmb, X, Y, max_iter=100000, standardization = True):
    
    scaler = StandardScaler()
    scaler.fit(X)
    X_standardized = scaler.transform(X)
    lasso =  Lasso(alpha=lmb, max_iter=max_iter)
    lasso.fit(X_standardized, Y.copy())
    coef =  lasso.coef_
    selected_indices = coef.nonzero()[0]
    selected_features = np.array(dfX.columns.values)[selected_indices]
    Y_predict = lasso.predict(X_standardized)
    MAE, MSE, ME = Utils.compute_error(Y.copy(), Y_predict)
        
    return coef, selected_indices, selected_features, MAE, MSE, ME

LassoFit(0.01, dfX, Y)

(array([ 0., -0.,  0., ..., -0.,  0.,  0.]),
 array([  105,   116,   119,   609,   613,   630,   763,   772,   776,
          904,  1068,  1148,  1502,  2292,  3173,  4074,  4792,  4803,
         4806,  6608,  7083,  7129,  7261,  7347,  7422,  7473,  7474,
         7477,  7599,  9517,  9522,  9524,  9526,  9528,  9535,  9551,
         9581,  9738, 10068, 11812, 12304, 12308, 12458, 12471, 12568,
        12883, 12909, 13023, 13054, 15988, 16001]),
 array(['([AD:Young])*([AD:Z])', '([AD:Charge])*([AD:Z])',
        '([AD:Charge])*([AD:Young])', '([AD:Vol])*([AD:Z])',
        '([AD:Vol])*([AD:Charge])', '([AD:Vol])*([AM:electronegativity])',
        '(([AD:Vol])^2)*([AM:IP2])',
        '(([AD:Vol])^2)*([AM:electronegativity])',
        '(([AD:Vol])^2)*((1/[AM:IP3]))', '(([AD:Vol])^3)*((1/m)^2)',
        '((1/[AM:Vol])^3)*(([AD:Vol])^2)', '([AD:Z])*((1-m)^2)*(m^2)',
        '((1/[AD:M]))*([AD:Young])*([AD:Z])',
        '([AM:IP2])*([AD:Charge])*([AD:Young])',
        '((1/[AM:M]))*([AD:Cha

In [12]:
#### Define a function which fits Lasso to have no more nonzero coefficients than a given threshold 
def LassoSelect(X, Y, min, max, step, threshold, standardization = True):
    
    scaler = StandardScaler()
    scaler.fit(X)
    X_standardized = scaler.transform(X)
        
    found = False
    for lmbda in np.arange (min, max, step):
        coef, selected_indices, selected_features, MAE, MSE, ME = LassoFit(lmbda, X.copy(), Y.copy())
        if len(selected_indices) <= threshold:
            found = True
            break
    
    if found:
        print("FOUND with threshold: {}".format(threshold))
        print("Lambda: {}, nnz: {}, MAE: {}, MSE: {}, MAPE: {}".format(lmbda, len(selected_indices), MAE, MSE, ME))
            
    else:
        print("NOT FOUND with threshold: {}".format(threshold))
        print("Closest are: ")
        print("Lambda: {}, nnz: {}, MAE: {}, MSE: {}, MAPE: {}".format(lmbda, len(selected_indices), MAE, MSE, ME))
     
    X_reduced = X[selected_features]
    
    return X_reduced


X_reduced = LassoSelect(dfX, Y, 0.001, 0.101, 0.005, 30)
X_reduced.head()

FOUND with threshold: 30
Lambda: 0.036000000000000004, nnz: 20, MAE: 0.0482960014896978, MSE: 0.005373846435157111, MAPE: 0.08193007493548482


Unnamed: 0,([AD:Vol])*([AD:Charge]),(([AD:Vol])^2)*((1-m)),(([AD:Vol])^2)*((1/[AM:IP3])),((1/[AM:Vol])^2)*(([AD:Vol])^2),((1/[AM:Vol])^3)*(([AD:Vol])^2),([AD:Young])*((1-m))*(m),((1/[AM:Charge]))*([AD:Young])*([AD:Z]),((1/[AM:Charge]))*([AD:Charge])*([AD:Young]),([AD:Vol])*([AM:IP2])*([AD:Charge]),([AD:Vol])*([AM:electronegativity])*([AD:Z]),([AD:Vol])*((1/[AM:IP3]))*([AD:Charge]),(([AD:Vol])^2)*((1-m))*(m),(([AD:Vol])^2)*((1-m)^2)*(m),(([AD:Vol])^2)*((1-m)^2)*(m^2),(([AD:Vol])^2)*([AD:Z])*((1-m)),(([AD:Vol])^2)*([AD:M])*((1-m)),(([AD:Vol])^2)*([AM:Young])*((1-m)),(([AD:Vol])^2)*([AM:Charge])*((1-m)),((1/[AM:Vol])^3)*(([AD:Vol])^2)*([AM:IP2]),((1/[AM:Vol])^3)*(([AD:Vol])^2)*((1/[AM:IP3]))
0,0.511268,0.261395,0.024117,0.000155,2e-06,0.752813,0.174565,0.174565,10.065675,0.567508,0.011793,0.196047,0.049012,0.036759,0.130698,0.158214,25.019466,3.006048,3.7e-05,4.352023e-08
1,0.511268,0.392093,0.024117,0.000155,2e-06,0.941016,0.174565,0.174565,10.065675,0.567508,0.011793,0.245058,0.091897,0.057436,0.196047,0.23732,37.529199,4.509072,3.7e-05,4.352023e-08
2,0.511268,0.522791,0.024117,0.000155,2e-06,1.00375,0.174565,0.174565,10.065675,0.567508,0.011793,0.261395,0.130698,0.065349,0.261395,0.316427,50.038932,6.012096,3.7e-05,4.352023e-08
3,0.511268,0.653489,0.024117,0.000155,2e-06,0.941016,0.174565,0.174565,10.065675,0.567508,0.011793,0.245058,0.153161,0.057436,0.326744,0.395534,62.548666,7.515119,3.7e-05,4.352023e-08
4,0.511268,0.784186,0.024117,0.000155,2e-06,0.752813,0.174565,0.174565,10.065675,0.567508,0.011793,0.196047,0.147035,0.036759,0.392093,0.474641,75.058399,9.018143,3.7e-05,4.352023e-08


In [13]:
def LassoL0(X, Y, nnz):    
    nr, nc = X.shape
    X = np.column_stack((X, np.ones(nr)))
    se_min = np.inner(Y, Y)
    coef_min, permu_min = None, None
    for permu in combinations(range(nc), nnz):
        X_ls = X[:, permu + (-1,)]
        coef, se, __1, __2 = np.linalg.lstsq(X_ls, Y, rcond=-1)
        try:
            if se[0] < se_min: 
                se_min = se[0]
                coef_min, permu_min = coef, permu
        except:
            pass
        
    return coef_min, permu_min

In [14]:
def LassoL0Fit(X_reduced, Y, nnz, log=True):
    
    scaler = StandardScaler()
    scaler.fit(X_reduced)
    X_std = scaler.transform(X_reduced)
    
    nr, nc = X_reduced.shape
   
    coefficients, selected_indices = LassoL0(X_std, Y, nnz)
   
    coefficients = np.array(coefficients)
    selected_indices = np.array(selected_indices)
    feature_reduced = np.array(X_reduced.columns.values)
    feature_list_selected = feature_reduced[selected_indices]
    
    X_selected = X_reduced[feature_list_selected]

    mean_selected = X_selected.mean()
    std_selected = X_selected.std()

    
    if log:
        print("Lasso: selected coefficients are: {}".format(coefficients))
        print("Lasso: selected features are: {}".format(feature_list_selected))
        
    #-mean/std
    mean_std = []
    for i in range(len(selected_indices)):
        mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
 
    sum_mean_std = sum(mean_std)

    for i in range(len(selected_indices)):
        coefficients[i] = coefficients[i] / std_selected[i]

    
    coefficients[len(selected_indices)] -= sum_mean_std
    
    function = str(coefficients[0])+" * "+feature_list_selected[0]
    
    for i in range(1, len(selected_indices)):
        if coefficients[i] >= 0:
            function += " + " + str(coefficients[i])+" * "+feature_list_selected[i]
        else:
            function += " - " + str(abs(coefficients[i]))+" * "+feature_list_selected[i]

    
    if coefficients[len(selected_indices)] >= 0:
        function += " + " + str(coefficients[len(selected_indices)])
    else:
        function += " - " + str(abs(coefficients[len(selected_indices)]))
    
    if log:
        print("Constructed function is: {}".format(function))

    X_selected = np.column_stack((X_selected, np.ones(X_selected.shape[0])))
    Y_predict = X_selected[:,0] * coefficients[0]

    for i in range(1,len(selected_indices)+1):
        Y_predict = Y_predict + X_selected[:,i] * coefficients[i]
    
    if log:
        Utils.print_error(Y.copy(),Y_predict,"Lasso L0: {} coef".format(nnz))
    
    return Y_predict, coefficients, selected_indices

In [15]:
LassoL0Fit(X_reduced, Y.copy(), 1);

Lasso: selected coefficients are: [3.6874823 3.1736172]
Lasso: selected features are: ['(([AD:Vol])^2)*((1-m))*(m)']
Constructed function is: 1.2264765333867818 * (([AD:Vol])^2)*((1-m))*(m) + 0.06308556014158206
Lasso L0: 1 coef
Mean absolute error: 0.16999152916334842
Mean squared error: 0.06885010753784614
Mean absolute percentage error: 0.2021633881048294


  mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
  coefficients[i] = coefficients[i] / std_selected[i]


In [16]:
LassoL0Fit(X_reduced, Y.copy(), 2);

Lasso: selected coefficients are: [1.98536388 1.7522968  3.1736172 ]
Lasso: selected features are: ['((1/[AM:Vol])^3)*(([AD:Vol])^2)' '(([AD:Vol])^2)*((1-m)^2)*(m^2)']
Constructed function is: 62658.08649882582 * ((1/[AM:Vol])^3)*(([AD:Vol])^2) + 2.5431214573690673 * (([AD:Vol])^2)*((1-m)^2)*(m^2) + 0.009410240446634965
Lasso L0: 2 coef
Mean absolute error: 0.08178305315102993
Mean squared error: 0.0243344679349078
Mean absolute percentage error: 0.05549948551940692


  mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
  coefficients[i] = coefficients[i] / std_selected[i]


In [17]:
LassoL0Fit(X_reduced, Y.copy(), 3);

Lasso: selected coefficients are: [1.69209297 1.75521259 0.32068741 3.1736172 ]
Lasso: selected features are: ['((1/[AM:Vol])^3)*(([AD:Vol])^2)' '(([AD:Vol])^2)*((1-m)^2)*(m^2)'
 '(([AD:Vol])^2)*([AM:Young])*((1-m))']
Constructed function is: 53402.45597571192 * ((1/[AM:Vol])^3)*(([AD:Vol])^2) + 2.5473531615686715 * (([AD:Vol])^2)*((1-m)^2)*(m^2) + 0.00030404527208673706 * (([AD:Vol])^2)*([AM:Young])*((1-m)) + 0.012323747723713474
Lasso L0: 3 coef
Mean absolute error: 0.04923051659791516
Mean squared error: 0.005880810060592321
Mean absolute percentage error: 0.05924008575884566


  mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
  coefficients[i] = coefficients[i] / std_selected[i]


In [18]:
LassoL0Fit(X_reduced, Y.copy(), 4);

Lasso: selected coefficients are: [ 0.32365844  3.63775771 -4.13554286  3.98467349  3.1736172 ]
Lasso: selected features are: ['(([AD:Vol])^2)*((1-m))' '((1/[AM:Vol])^2)*(([AD:Vol])^2)'
 '(([AD:Vol])^2)*((1-m))*(m)' '(([AD:Vol])^2)*((1-m)^2)*(m^2)']
Constructed function is: 0.04322886821167512 * (([AD:Vol])^2)*((1-m)) + 1524.3348249028963 * ((1/[AM:Vol])^2)*(([AD:Vol])^2) - 1.3755038965384665 * (([AD:Vol])^2)*((1-m))*(m) + 5.782986456486036 * (([AD:Vol])^2)*((1-m)^2)*(m^2) + 0.004700895753725121
Lasso L0: 4 coef
Mean absolute error: 0.03963678263488842
Mean squared error: 0.003348722729325236
Mean absolute percentage error: 0.04452193248868577


  mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
  coefficients[i] = coefficients[i] / std_selected[i]


In [19]:
LassoL0Fit(X_reduced, Y.copy(), 5);

Lasso: selected coefficients are: [  0.32235988  13.12465102  -6.34915391 -10.6876808    7.46469376
   3.1736172 ]
Lasso: selected features are: ['(([AD:Vol])^2)*((1-m))' '((1/[AM:Vol])^2)*(([AD:Vol])^2)'
 '((1/[AM:Vol])^3)*(([AD:Vol])^2)' '(([AD:Vol])^2)*((1-m))*(m)'
 '(([AD:Vol])^2)*((1-m)^2)*(m^2)']
Constructed function is: 0.04305542798274422 * (([AD:Vol])^2)*((1-m)) + 5499.641322902264 * ((1/[AM:Vol])^2)*(([AD:Vol])^2) - 200379.30470940837 * ((1/[AM:Vol])^3)*(([AD:Vol])^2) - 3.5547803730570413 * (([AD:Vol])^2)*((1-m))*(m) + 10.83356592295805 * (([AD:Vol])^2)*((1-m)^2)*(m^2) + 0.011480591872340185
Lasso L0: 5 coef
Mean absolute error: 0.034846890702588396
Mean squared error: 0.0024547264898192897
Mean absolute percentage error: 0.05585166097954957


  mean_std.append(coefficients[i] * mean_selected[i]/std_selected[i])
  coefficients[i] = coefficients[i] / std_selected[i]
