# Multi-XRF Using MVR

In [71]:
# If you have installation questions, please reach out
import pandas as pd # data storage
import numpy as np  # math and stuff

import sklearn  
import datetime

from sklearn import linear_model

from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, max_error, mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [72]:
import defaults
from defaults import framecleaner, splitterz

# Dataframes

In [73]:
df0  = pd.read_csv('../../core_to_wl_merge/OS0_Merged_dataset_imputed_08_23_2021.csv')

df1 = pd.read_csv('../../core_to_wl_merge/OS1_Merged_dataset_imputed_08_23_2021.csv')

df2 = pd.read_csv('../../core_to_wl_merge/OS2_Merged_dataset_imputed_08_23_2021.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [74]:
param_dict ={
    "dataset": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth', 'Ti', 'Mg', 'Si', 'Al', 'Ca'],
    "inputs": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth'],
    "target": ['Ti', 'Mg', 'Si', 'Al', 'Ca']}

In [75]:
def mvr_multi_xrf(X_train, X_test, y_train, y_test, OS='os'):
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  print('Intercept: \n', regr.intercept_)
  print('Coefficients: \n', regr.coef_)

  preds = regr.predict(X_test)

  rmse5 = mean_squared_error(y_test, preds, squared=False)
  #print("Mean Squared Error: %f" % (rmse5))
  MAE2 = median_absolute_error(y_test, preds)
  #print("Median Abs Error: %f" % (MAE2))

  x = datetime.datetime.now()

  d = {'target': ['Multi-XRF'],
      'Offset':[OS],
     'RMSE': [rmse5],
     'MAE': [MAE2], 
     'day': [x.day], 
     'month':[x.month], 
     'year':[x.year],
     'model':['MVR'],
     'version':[sklearn.__version__]}

  results = pd.DataFrame(data=d)

  return results

# Offset 0

In [76]:
df0 = df0[df0.Al >= 0]

#Create the dataset
X, Y_array = framecleaner(df0, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X_train, X_test, y_train, y_test = splitterz(X.values, Y_array)

df_OS0 = mvr_multi_xrf(X_train, X_test, y_train, y_test, OS='OS0')
df_OS0

Intercept: 
 [ 0.18091073  1.31833241 26.37960385  1.44817828  7.78519667]
Coefficients: 
 [[ 3.18423340e-03 -3.11003177e-01 -4.56775726e-02 -2.86708014e-02
  -2.62161260e-02 -3.14768719e-03 -5.85943017e-03 -1.51705795e-02
  -1.51705795e-02  5.16010850e-01  3.20701340e-02]
 [-2.40923187e-01 -1.90711997e+00 -1.16853660e-01 -3.38384655e-01
  -4.44381975e-01  1.06334649e-02 -4.05102883e-03 -5.82420422e-01
  -5.82420422e-01  3.10454568e+00 -9.97330980e-02]
 [ 1.86927494e+00  9.83488545e+00  1.07882025e+00 -1.92971298e+00
   9.88280184e+00 -1.62269208e+00  3.49422147e-01  5.70861922e+00
   5.70861922e+00 -1.77878246e+01  4.63641825e+00]
 [ 8.85839627e-01 -5.40003196e+00 -3.31694135e-01 -3.57972471e-01
   1.34230571e+00  1.73103646e-01 -1.11309926e-01  5.34642266e-01
   5.34642266e-01  8.58954525e+00  1.08384207e-01]
 [-2.24466566e+00  1.22959481e+00 -7.44237055e-01  3.72219273e-01
  -4.17774896e+00  7.86316843e-01  3.43865496e-03 -3.05275008e+00
  -3.05275008e+00  2.52834996e-01 -2.98377975

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,day,month,year,model,version
0,Multi-XRF,OS0,1.346814,0.704717,12,1,2022,MVR,1.0.1


# Offset 1

In [77]:
#Create the dataset
X1, Y1_array = framecleaner(df1, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X1_train, X1_test, y1_train, y1_test = splitterz(X1.values, Y1_array)

df_OS1 = mvr_multi_xrf(X1_train, X1_test, y1_train, y1_test, OS='OS1')
df_OS1

Intercept: 
 [ 0.09294564  0.44911532 31.33186348 -0.43121368  5.98007413]
Coefficients: 
 [[ 3.57081773e-04  1.64392632e-01  4.45395567e-03 -2.53452421e-02
   4.07584210e-02  3.28683121e-03 -3.87434982e-03  1.65582597e-04
   1.65582597e-04  5.07489556e-02  6.28472165e-02]
 [-7.18216714e-02 -1.64830339e-01  2.23519614e-01 -2.11450240e-01
   1.12741833e-01  7.01645446e-02  4.63768671e-03 -4.02224592e-01
  -4.02224592e-01  1.28542473e+00  2.63773146e-01]
 [ 1.55121252e+00 -2.28999702e+00 -3.99240489e-01 -2.87226839e+00
   6.85462561e+00 -3.52747715e+00  2.05788794e-01  4.89472931e+00
   4.89472931e+00 -5.23511074e+00  2.99738603e+00]
 [ 9.03247790e-01  2.87888952e+00  5.00831326e-01 -5.45214841e-02
   2.61171247e+00  2.55991705e-01 -8.90618264e-02  8.99202683e-01
   8.99202683e-01  6.30486670e-02  1.26080961e+00]
 [-1.40707193e+00 -3.40732944e+00 -4.56084124e-01  5.27602910e-01
  -3.09698146e+00  1.33809845e+00  5.17293108e-02 -2.69830603e+00
  -2.69830603e+00  4.43413839e+00 -2.29221739

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,day,month,year,model,version
0,Multi-XRF,OS1,1.162143,0.598453,12,1,2022,MVR,1.0.1


# Offset 2

In [78]:
#Create the dataset
X2, Y2_array = framecleaner(df2, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X2_train, X2_test, y2_train, y2_test = splitterz(X2.values, Y2_array)

df_OS2 = mvr_multi_xrf(X2_train, X2_test, y2_train, y2_test, OS='OS2')
df_OS2

Intercept: 
 [ 0.07448925  0.36507583 31.30832871 -0.57563208  6.4176292 ]
Coefficients: 
 [[ 8.24679483e-03  4.10520905e-01  5.15637802e-03 -1.84296425e-02
   5.10090815e-02 -2.39055390e-02 -6.14041165e-03  4.82353343e-03
   4.82353343e-03 -2.02244898e-01  1.06536224e-01]
 [-2.54541103e-02  1.32682596e+00  2.33693288e-01 -1.82837715e-01
   1.43654799e-01 -2.13078635e-01  1.35880466e-03 -3.90666496e-01
  -3.90666496e-01 -1.71005284e-01  5.52694472e-01]
 [ 2.14873693e+00 -8.22304659e+00 -6.78649625e-01 -3.07604956e+00
   6.89822622e+00 -1.06250508e+00  3.04011123e-01  4.92862655e+00
   4.92862655e+00  7.01316458e-01  3.00069738e-01]
 [ 1.00697307e+00  7.60740209e+00  5.56302820e-01 -6.02464941e-02
   2.74144536e+00 -4.53436796e-01 -1.22082503e-01  9.38805248e-01
   9.38805248e-01 -4.68744429e+00  2.04669473e+00]
 [-1.91366622e+00 -5.33455594e+00 -4.20998030e-01  4.40627597e-01
  -3.25361102e+00  1.80008047e+00  5.30479227e-02 -2.87678676e+00
  -2.87678676e+00  6.52111948e+00 -2.91472533

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,day,month,year,model,version
0,Multi-XRF,OS2,1.284564,0.570352,12,1,2022,MVR,1.0.1


# Combine Datasets

In [79]:
frames = [df_OS0, df_OS1, df_OS2]
results = pd.concat(frames)
results

Unnamed: 0,target,Offset,RMSE,MAE,day,month,year,model,version
0,Multi-XRF,OS0,1.346814,0.704717,12,1,2022,MVR,1.0.1
0,Multi-XRF,OS1,1.162143,0.598453,12,1,2022,MVR,1.0.1
0,Multi-XRF,OS2,1.284564,0.570352,12,1,2022,MVR,1.0.1


In [80]:
results.to_csv('mvr_results/MVR_XRF_1_12_v7.csv')