# Predicting GZ using MVR

In [51]:
# If you have installation questions, please reach out
import pandas as pd # data storage
import numpy as np  # math and stuff

import sklearn  
import datetime

from sklearn import linear_model
# import statsmodels.api as sm

from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, max_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import explained_variance_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [52]:
import defaults
from defaults import framecleaner, splitterz

# Dataframes

In [53]:
df0  = pd.read_csv('../../core_to_wl_merge/OS0_Merged_dataset_imputed_08_23_2021.csv')

df1 = pd.read_csv('../../core_to_wl_merge/OS1_Merged_dataset_imputed_08_23_2021.csv')

df2 = pd.read_csv('../../core_to_wl_merge/OS2_Merged_dataset_imputed_08_23_2021.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [54]:
param_dict ={
    "dataset": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD',  'PHID', 'gz_pchip_interp'],
    "inputs": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD',  'PHID'],
    "target": ['gz_pchip_interp']}

In [55]:
def mvr_gz(X_train, X_test, y_train, y_test, OS='os'):
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  print('Intercept: \n', regr.intercept_)
  print('Coefficients: \n', regr.coef_)

  preds = regr.predict(X_test)

  rmse5 = mean_squared_error(y_test, preds, squared=False)
  #print("Mean Squared Error: %f" % (rmse5))
  max5 = max_error(y_test, preds)
  #print("Max Error: %f" % (max5))
  MAE2 = median_absolute_error(y_test, preds)
  #print("Median Abs Error: %f" % (MAE2))

  x = datetime.datetime.now()

  d = {'target': ['GZ'],
      'Offset':[OS],
     'RMSE': [rmse5],
     'MAE': [MAE2],
     'MaxError': [max5], 
     'day': [x.day], 
     'month':[x.month], 
     'year':[x.year],
     'model':['MVR'],
     'version':[sklearn.__version__]}

  results = pd.DataFrame(data=d)


  return results

# Offset 0

In [56]:
#Create the dataset
X, Y_array = framecleaner(df0, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X_train, X_test, y_train, y_test = splitterz(X.values, Y_array)

df_OS0 = mvr_gz(X_train, X_test, y_train, y_test, OS='OS0')
df_OS0

Intercept: 
 [2.74573555]
Coefficients: 
 [[ 1.11547502  3.83741142 -0.29762057 -0.10092386  0.12323535  1.10855552
   0.0061076   0.715931  ]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS0,1.42438,0.880693,4.104948,12,1,2022,MVR,1.0.1


# Offset 1

In [57]:
#Create the dataset
X1, Y1_array = framecleaner(df1, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X1_train, X1_test, y1_train, y1_test = splitterz(X1.values, Y1_array)

df_OS1 = mvr_gz(X1_train, X1_test, y1_train, y1_test, OS='OS1')
df_OS1

Intercept: 
 [-0.55917572]
Coefficients: 
 [[1.69674402e+00 3.93149092e+00 1.11732484e+00 3.24170114e-01
  2.35549219e+00 2.45864785e+00 3.78968966e-03 1.92804005e+00]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS1,1.194178,0.732902,5.277963,12,1,2022,MVR,1.0.1


# Offset 2

In [58]:
#Create the dataset
X2, Y2_array = framecleaner(df2, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X2_train, X2_test, y2_train, y2_test = splitterz(X2.values, Y2_array)

df_OS2 = mvr_gz(X2_train, X2_test, y2_train, y2_test, OS='OS2')
df_OS2

Intercept: 
 [-0.30751624]
Coefficients: 
 [[1.54178627 4.60829127 0.98147422 0.28476942 2.21297031 1.92896048
  0.03115422 1.56004387]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS2,1.101342,0.693137,4.965731,12,1,2022,MVR,1.0.1


# Combine Results

In [59]:
frames = [df_OS0, df_OS1, df_OS2]
results = pd.concat(frames)
results

Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,GZ,OS0,1.42438,0.880693,4.104948,12,1,2022,MVR,1.0.1
0,GZ,OS1,1.194178,0.732902,5.277963,12,1,2022,MVR,1.0.1
0,GZ,OS2,1.101342,0.693137,4.965731,12,1,2022,MVR,1.0.1


In [60]:
results.to_csv('mvr_results/MVR_GZ_1_12_v5.csv')