# Predicting Perm using MVR

In [3]:
# If you have installation questions, please reach out
import pandas as pd # data storage
import numpy as np  # math and stuff

import sklearn  
import datetime

from sklearn import linear_model

from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, max_error, mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt # plotting utility

In [4]:
import defaults
from defaults import framecleaner, splitterz

# Dataframes

In [5]:
df0  = pd.read_csv('../../core_to_wl_merge/OS0_Merged_dataset_imputed_08_23_2021.csv')

df1 = pd.read_csv('../../core_to_wl_merge/OS1_Merged_dataset_imputed_08_23_2021.csv')

df2 = pd.read_csv('../../core_to_wl_merge/OS2_Merged_dataset_imputed_08_23_2021.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [6]:
param_dict ={
    "dataset": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth', 'perm_klink_md'],
    "inputs": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth'],
    "target": ['perm_klink_md']}

In [11]:
def mvr_perm(X_train, X_test, y_train, y_test, OS='os'):
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  print('Intercept: \n', regr.intercept_)
  print('Coefficients: \n', regr.coef_)

  preds = regr.predict(X_test)

  rmse5 = mean_squared_error(y_test, preds, squared=False)
  #print("Mean Squared Error: %f" % (rmse5))
  max5 = max_error(y_test, preds)
  #print("Max Error: %f" % (max5))
  MAE2 = median_absolute_error(y_test, preds)
  #print("Median Abs Error: %f" % (MAE2))

  x = datetime.datetime.now()

  d = {'target': ['perm'],
   'Offset':[OS],
     'MSE': [rmse5],
     'MAE': [MAE2],
     'MaxError': [max5], 
     'day': [x.day], 
     'month':[x.month], 
     'year':[x.year],
     'model':['MVR'],
     'version':[sklearn.__version__]}

  results = pd.DataFrame(data=d)

  return results

# Offset 0

In [17]:
#Create the dataset
X, Y_array = framecleaner(df0, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X_train, X_test, y_train, y_test = splitterz(X.values, Y_array)

df_OS0 = mvr_perm(X_train, X_test, y_train, y_test, OS='OS0')
df_OS0

Intercept: 
 [17.52825433]
Coefficients: 
 [[  5.08484904   3.1858771    4.79592558  14.42565764 -21.19481088
    2.38280219  -0.06959268  -7.42104901  -7.42104901  -6.20509017
   -2.44659259]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,MSE,MAE,MaxError,day,month,year,model,version
0,perm,OS0,3.334961,1.739469,14.841648,9,9,2021,MVR,0.24.2


# Offset 1

In [18]:
#Create the dataset
X1, Y1_array = framecleaner(df1, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X1_train, X1_test, y1_train, y1_test = splitterz(X1.values, Y1_array)

df_OS1 = mvr_perm(X1_train, X1_test, y1_train, y1_test, OS='OS1')
df_OS1

Intercept: 
 [21.90394457]
Coefficients: 
 [[ 2.36195481e+00 -7.43580324e-01  5.28441425e+00  1.34520357e+01
  -2.36427384e+01  5.55483328e-01  1.31334844e-01 -8.11982509e+00
  -8.11982509e+00 -2.32210330e-03 -6.32166807e+00]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,MSE,MAE,MaxError,day,month,year,model,version
0,perm,OS1,2.910056,1.59612,14.447998,9,9,2021,MVR,0.24.2


# Offset 2

In [19]:
#Create the dataset
X2, Y2_array = framecleaner(df2, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X2_train, X2_test, y2_train, y2_test = splitterz(X2.values, Y2_array)

df_OS2 = mvr_perm(X2_train, X2_test, y2_train, y2_test, OS='OS2')
df_OS2

Intercept: 
 [22.82800174]
Coefficients: 
 [[  1.8026098   -1.27676063   4.88889694  13.71251676 -24.15064814
   -1.08120748   0.07656598  -8.03991996  -8.03991996  -0.68414262
   -5.79424326]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,MSE,MAE,MaxError,day,month,year,model,version
0,perm,OS2,2.492163,1.438295,11.846552,9,9,2021,MVR,0.24.2


# Combine Datasets

In [20]:
frames = [df_OS0, df_OS1, df_OS2]
results = pd.concat(frames)
results

Unnamed: 0,target,Offset,MSE,MAE,MaxError,day,month,year,model,version
0,perm,OS0,3.334961,1.739469,14.841648,9,9,2021,MVR,0.24.2
0,perm,OS1,2.910056,1.59612,14.447998,9,9,2021,MVR,0.24.2
0,perm,OS2,2.492163,1.438295,11.846552,9,9,2021,MVR,0.24.2
