In [2]:
# If you have installation questions, please reach out
import pandas as pd # data storage
import numpy as np  # math and stuff

import sklearn  
import datetime

from sklearn import linear_model

from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error, max_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import explained_variance_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


In [3]:
import defaults
from defaults import framecleaner, splitterz

# Dataframes

In [4]:
df0  = pd.read_csv('../../core_to_wl_merge/OS0_Merged_dataset_imputed_08_23_2021.csv')

df1 = pd.read_csv('../../core_to_wl_merge/OS1_Merged_dataset_imputed_08_23_2021.csv')

df2 = pd.read_csv('../../core_to_wl_merge/OS2_Merged_dataset_imputed_08_23_2021.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [5]:
param_dict ={
    "dataset": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth', 'He_por'],
    "inputs": ['CAL', 'GR', 'DT', 'SP', 'DENS', 'PE', 'RESD', 'PHIN', 'PHID', 'GR_smooth',  'PE_smooth'],
    "target": ['He_por']}

In [6]:
def mvr_por(X_train, X_test, y_train, y_test, OS='os'):
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  print('Intercept: \n', regr.intercept_)
  print('Coefficients: \n', regr.coef_)

  preds = regr.predict(X_test)

  rmse5 = mean_squared_error(y_test, preds, squared=False)
  #print("Mean Squared Error: %f" % (rmse5))
  max5 = max_error(y_test, preds)
  #print("Max Error: %f" % (max5))
  MAE2 = median_absolute_error(y_test, preds)
  #print("Median Abs Error: %f" % (MAE2))

  x = datetime.datetime.now()

  d = {'target': ['He_Por'],
   'Offset':[OS],
     'RMSE': [rmse5],
     'MAE': [MAE2],
     'MaxError': [max5], 
     'day': [x.day], 
     'month':[x.month], 
     'year':[x.year],
     'model':['MVR'],
     'version':[sklearn.__version__]}

  results = pd.DataFrame(data=d)

  return results

# Offset 0

In [7]:
#Create the dataset
X, Y_array = framecleaner(df0, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X_train, X_test, y_train, y_test = splitterz(X.values, Y_array)

df_OS0 = mvr_por(X_train, X_test, y_train, y_test, OS='OS0')
df_OS0

Intercept: 
 [22.11747159]
Coefficients: 
 [[  3.85357558   0.10999963   3.22853591  13.01912994 -22.74062864
    2.54936247  -0.24357286  -8.70053274  -8.70053274  -3.97528707
   -3.4727575 ]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,He_Por,OS0,2.859696,1.434416,18.266337,9,9,2021,MVR,0.24.2


# Offset 1

In [12]:
#Create the dataset
X1, Y1_array = framecleaner(df1, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X1_train, X1_test, y1_train, y1_test = splitterz(X1.values, Y1_array)

df_OS1 = mvr_por(X1_train, X1_test, y1_train, y1_test, OS='OS1')
df_OS1

Intercept: 
 [24.43154593]
Coefficients: 
 [[  2.10017932   0.60986483   4.07783615  13.1794162  -25.38673891
    1.86557381   0.02984438  -8.25545323  -8.25545323  -1.8636234
   -8.99354123]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,He_Por,OS1,2.851013,1.319778,13.663413,9,9,2021,MVR,0.24.2


# Offset 2

In [9]:
#Create the dataset
X2, Y2_array = framecleaner(df2, param_dict['dataset'], param_dict['inputs'], param_dict['target'] )

#Split the dataset
X2_train, X2_test, y2_train, y2_test = splitterz(X2.values, Y2_array)

df_OS2 = mvr_por(X2_train, X2_test, y2_train, y2_test, OS='OS2')
df_OS2

Intercept: 
 [25.49361702]
Coefficients: 
 [[ 1.13422680e+00  8.74680516e-01  3.39071883e+00  1.38255484e+01
  -2.60442301e+01  3.64820733e+00 -1.22130428e-02 -8.71916529e+00
  -8.71916529e+00 -2.31044294e+00 -1.12712528e+01]]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,He_Por,OS2,3.349101,1.413274,20.031325,9,9,2021,MVR,0.24.2


# Combine Datasets

In [10]:
frames = [df_OS0, df_OS1, df_OS2]
results = pd.concat(frames)
results

Unnamed: 0,target,Offset,RMSE,MAE,MaxError,day,month,year,model,version
0,He_Por,OS0,2.859696,1.434416,18.266337,9,9,2021,MVR,0.24.2
0,He_Por,OS1,3.11805,1.652063,15.870112,9,9,2021,MVR,0.24.2
0,He_Por,OS2,3.349101,1.413274,20.031325,9,9,2021,MVR,0.24.2
