In [1]:
from joblib import dump, load
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error,mean_squared_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

In [2]:
import xgboost as xgb

In [3]:
import sys
sys.path.append('temp/temp/')
import make_tissue as mt

In [4]:
def mean_OD(spectra,wvs,wvl,window):
    left_wvl = wvl-(window/2.)
    right_wvl = wvl+(window/2.)
    wvl_mask = (left_wvl<=wvs) & (wvs<=right_wvl)
    return spectra[wvl_mask].mean()
def mean_R(spectra,wvl,window):
    left_wvl = wvl-(window/2.)
    right_wvl = wvl+(window/2.)
    wvl_mask = (left_wvl<=all_wvs) & (all_wvs<=right_wvl)
    return spectra[wvl_mask].mean()
def line(spectra,wvl_1,wvl_2):
    y_1 = mean_OD(spectra,wvl_1,2.)
    y_2 = mean_OD(spectra,wvl_2,2.) 
    a = (y_2 - y_1)/(wvl_2-wvl_1)
    b = (y_1*wvl_2-y_2*wvl_1)/(wvl_2-wvl_1)
    return a*wvs+b
def line_correction(spectra,left_wvl,right_wvl):
    return spectra - line(spectra,left_wvl,right_wvl)
def norm(y):
    return (y-y.min())/(y.max()-y.min())

## Train test split

In [5]:
csv_file='2_layered_model_mcml.csv'

data = pd.read_csv(csv_file)

distances = np.arange(0.0025,1.5,0.005).round(4)

data.columns = np.append(distances,['mua1', 'mus1', 'd1','mua2', 'mus2','d2'])
data.drop((np.arange( 0.3225,1.5,0.005).round(4).astype(str)), axis=1, inplace=True)
data.drop('d2', axis=1, inplace=True)
optical = ['mua1','mus1','d1','mua2','mus2']
Rs = [r for r in data.columns if r not in optical]
dr=0.005
for col in Rs:
    distance = float(col)
    data[col] = 2*np.pi*distance*dr*data[col]        
optical_prop = data[optical]
data_R = data[Rs]
X_train, X_test, ys_train, ys_test = train_test_split(data_R, optical_prop, test_size=0.10, random_state=42)

In [6]:
X_train_part = X_train.copy()
ys_train_part = ys_train.copy()

X_test_part = X_test.copy()
ys_test_part = ys_test.copy()

In [7]:
optical_prop['mua1/mua2']=optical_prop['mua1']/optical_prop['mua2']
optical_prop['mus1/mus2']=optical_prop['mus1']/optical_prop['mus2']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [8]:
phys_mask = (optical_prop['mua1/mua2']<1)&(optical_prop['mus1/mus2']<1)&(optical_prop['d1']<0.21)
data_R_phys = data_R[phys_mask]
optical_prop_phys = optical_prop[phys_mask]

In [9]:
X_train_phys, X_test_phys, ys_train_phys, ys_test_phys = train_test_split(data_R_phys, optical_prop_phys, test_size=0.10, random_state=42)

## XGboost

In [10]:
data_dmatrix = xgb.DMatrix(data=X_train_phys,label=ys_train_phys['d1'])

In [None]:
params = {"objective":"reg:squarederror",'colsample_bytree': 0.3,'learning_rate': 0.005,
                'max_depth': 5, 'alpha': 10,'n_jobs':-1}

cv_results = xgb.cv(dtrain=data_dmatrix, params=params, nfold=3,
                    num_boost_round=5000,early_stopping_rounds=2000,metrics="rmse", as_pandas=True, seed=123)

In [None]:
plt.plot(cv_results['train-rmse-mean'])
plt.xlabel('n estimators')
plt.ylabel('RMSE train, $cm^{-1}$')

In [None]:
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.005,
                max_depth = 5, alpha = 10, n_estimators = 5000)

In [None]:
xg_reg.fit(X_train_phys, ys_train_phys['d1'])

In [None]:
y_pred = xg_reg.predict(X_test_phys)

In [None]:
plt.plot(ys_test_phys['d1'],y_pred,'o')
plt.plot([0.075,0.2],[0.075,0.2],'-',label='y=y')
print('root_mse: ', np.sqrt(mean_squared_error(ys_test_phys['d1'],y_pred)))
print('r2: ', r2_score(ys_test_phys['d1'], y_pred))
plt.xlabel('$d_1$ test')
plt.ylabel('$d_1$ predicted')
plt.legend()