In [91]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

# # suppress NumPy arrays scientific notation and round decimals to three places
np.set_printoptions(suppress=True)
np.printoptions(precision=3, suppress=True)

<contextlib._GeneratorContextManager at 0x12c53d8ea90>

In [92]:
df = pd.read_csv('data.txt', delim_whitespace=True, encoding='ISO-8859-1')
x = df[['MRR(mm3/min)', 'Ra(µm)']]
y = df.drop(['Sr._No.', 'MRR(mm3/min)', 'Ra(µm)'], axis=1)

In [93]:
dtr_steps = [('standard_scaler', StandardScaler()), ('dtr', DecisionTreeRegressor())]
dtr_pipeline = Pipeline(dtr_steps)

In [94]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, stratify=None)

In [95]:
x_train

Unnamed: 0,MRR(mm3/min),Ra(µm)
26,7518.86,1.82
20,2582.46,0.38
13,3092.37,0.82
24,2549.2,1.75
5,1727.36,1.1
17,6508.93,2.03
8,2797.84,2.17
30,5247.47,1.49
25,5055.49,1.89
23,5149.9,1.11


In [96]:
dtr_parameteres = {'dtr__criterion': ['mse','friedman_mse'],
               'dtr__splitter': ['best', 'random'],
               'dtr__min_weight_fraction_leaf': [0.0, 0.1, 0.5, 1.0, 2.0],
               'dtr__max_features': ['auto', 'sqrt', 'log2']}

In [97]:
grid = GridSearchCV(dtr_pipeline, param_grid=dtr_parameteres, cv=None)

In [98]:
grid.fit(x_train, y_train)
y_pred = grid.predict(x_test)
print("score = %3.2f" % (grid.score(x_test, y_test)))
print(grid.best_params_)

score = -0.38
{'dtr__criterion': 'friedman_mse', 'dtr__max_features': 'log2', 'dtr__min_weight_fraction_leaf': 0.0, 'dtr__splitter': 'random'}


In [99]:
print(x_test)
print(y_test)
print(y_pred)

MRR(mm3/min)  Ra(µm)
11       2351.92    0.54
22       3461.88    0.79
10       1577.02    0.48
2         909.28    0.52
16       4366.50    1.91
14       4611.35    0.97
28       2130.54    0.31
    Spindle_speed(rpm)  Feed_rate(mm/rev)  Depth_of_cut(mm)
11                 710             0.0508              1.20
22                1120             0.1016              0.80
10                 710             0.0508              0.80
2                  280             0.0508              1.20
16                 710             0.1524              0.80
14                 710             0.1016              1.20
28                1100             0.0500              0.69
[[1120.        0.0508    1.2   ]
 [ 710.        0.1016    0.8   ]
 [ 710.        0.1016    0.4   ]
 [ 280.        0.0508    0.8   ]
 [ 280.        0.1524    1.2   ]
 [ 710.        0.1016    0.8   ]
 [1120.        0.0508    1.2   ]]


google "decisiontreeregressor pipeline example"

In [100]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, y_pred)

-0.33598138814937756

In [101]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_pred)

60.5791238095238

In [102]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred) 

24833.37631815619

In [103]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

-0.37805337090279284