# Predicting GZ using MVR

In [30]:
# If you have installation questions, please reach out
import pandas as pd # data storage
import numpy as np  # math and stuff

import sklearn  
import datetime

from sklearn import linear_model
# import statsmodels.api as sm

from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, max_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import explained_variance_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [31]:
import defaults
from defaults import framecleaner, splitterz

# Dataframes

In [32]:
df0  = pd.read_csv('../../core_to_wl_merge/OS0_Merged_dataset_imputed_08_23_2021.csv')

df1 = pd.read_csv('../../core_to_wl_merge/OS1_Merged_dataset_imputed_08_23_2021.csv')

df2 = pd.read_csv('../../core_to_wl_merge/OS2_Merged_dataset_imputed_08_23_2021.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [33]:
param_dict ={
    "dataset": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth', 'gz_pchip_interp'],
    "inputs": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth'],
    "target": ['gz_pchip_interp']}

In [34]:
def mvr_gz(X_train, X_test, y_train, y_test, OS='os'):
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  print('Intercept: \n', regr.intercept_)
  print('Coefficients: \n', regr.coef_)

  preds = regr.predict(X_test)

  rmse5 = mean_squared_error(y_test, preds, squared=False)
  #print("Mean Squared Error: %f" % (rmse5))
  max5 = max_error(y_test, preds)
  #print("Max Error: %f" % (max5))
  MAE2 = median_absolute_error(y_test, preds)
  #print("Median Abs Error: %f" % (MAE2))

  x = datetime.datetime.now()

  d = {'target': ['GZ'],
      'Offset':[OS],
     'RMSE': [rmse5],
     'MAE': [MAE2],
     'MaxError': [max5], 
     'day': [x.day], 
     'month':[x.month], 
     'year':[x.year],
     'model':['MVR'],
     'version':[sklearn.__version__]}

  results = pd.DataFrame(data=d)


  return results

# Offset 0

In [35]:
#Create the dataset
X, Y_array = framecleaner(df0, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X_train, X_test, y_train, y_test = splitterz(X.values, Y_array)

df_OS0 = mvr_gz(X_train, X_test, y_train, y_test, OS='OS0')
df_OS0

Intercept: 
 [2.69381281]
Coefficients: 
 [[ 0.91332059 -7.3138976  -0.13601436 -0.24113276  0.20688471 -0.49670809
   0.03694261  0.2832871   0.2832871  11.78779079  1.47057242]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS0,1.377366,0.896899,4.372962,9,9,2021,MVR,0.24.2


# Offset 1

In [39]:
#Create the dataset
X1, Y1_array = framecleaner(df1, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X1_train, X1_test, y1_train, y1_test = splitterz(X1.values, Y1_array)

df_OS1 = mvr_gz(X1_train, X1_test, y1_train, y1_test, OS='OS1')
df_OS1

Intercept: 
 [-0.76899316]
Coefficients: 
 [[ 1.66718531 -1.30181156  1.14793144  0.33616924  2.46237449 -0.19464839
   0.01966483  0.96229088  0.96229088  5.50290088  2.75743111]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS1,1.180301,0.745006,4.566401,9,9,2021,MVR,0.24.2


# Offset 2

In [37]:
#Create the dataset
X2, Y2_array = framecleaner(df2, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X2_train, X2_test, y2_train, y2_test = splitterz(X2.values, Y2_array)

df_OS2 = mvr_gz(X2_train, X2_test, y2_train, y2_test, OS='OS2')
df_OS2

Intercept: 
 [-0.43587226]
Coefficients: 
 [[ 1.58604621  3.64858147  0.84944547  0.3364138   2.25154905 -1.01173371
   0.03094817  0.7894088   0.7894088   0.94328592  3.23043352]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS2,1.105071,0.676237,5.2393,9,9,2021,MVR,0.24.2


# Combine Results

In [38]:
frames = [df_OS0, df_OS1, df_OS2]
results = pd.concat(frames)
results

Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS0,1.377366,0.896899,4.372962,9,9,2021,MVR,0.24.2
0,GZ,OS1,1.151469,0.727164,4.525242,9,9,2021,MVR,0.24.2
0,GZ,OS2,1.105071,0.676237,5.2393,9,9,2021,MVR,0.24.2
