In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn import datasets, ensemble
from scipy.stats import loguniform

In [2]:
df = pd.read_excel('/content/122 draft data.xlsx')

In [3]:
df.head()

Unnamed: 0,PTO Power,Tire Sectional width (m),Tire overall diameter (m),Drawbar height (m),Wheel base (m),Total mass (kN),Velocity (km/h),Maximum drawbar pull at 15% wheel slip (kN)
0,31.3,0.345,1.31,0.65,1.94,18.737,3.07,15.37
1,22.2,0.315,1.26,0.65,1.95,18.737,3.08,14.46
2,34.0,0.345,1.31,0.65,1.98,20.748,3.03,18.01
3,34.1,0.241,1.05,0.5,1.48,11.134,2.65,8.37
4,12.9,0.429,1.435,0.6,1.88,18.344,3.05,13.63


In [4]:
X = df.drop(['Maximum drawbar pull at 15% wheel slip (kN)'], axis=1).values
y = df['Maximum drawbar pull at 15% wheel slip (kN)'].values

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
param_distributions = {
    "n_estimators": [1, 2, 5, 10, 20, 50, 100, 200, 500],
    "max_leaf_nodes": [2, 5, 10, 20, 50, 100],
    "learning_rate": loguniform(0.01, 1),
}
gbr = RandomizedSearchCV(
    GradientBoostingRegressor(),
    param_distributions=param_distributions,
    scoring="r2",
    n_iter=20,
    random_state=0,
    n_jobs=2,
)
gbr.fit(X_train, y_train)

In [8]:
gbr_pred = gbr.predict(X_test)
print('MAE:', mean_absolute_error(y_test, gbr_pred))
print('MAE:', mean_absolute_percentage_error(y_test, gbr_pred))
print('MSE:', mean_squared_error(y_test, gbr_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, gbr_pred)))
print('R2:', r2_score(y_test, gbr_pred))

MAE: 0.8858681541228034
MAE: 0.061547464091881665
MSE: 1.2549387372420606
RMSE: 1.1202404818796992
R2: 0.8984659600666285


In [9]:
grid_gbr_pred =gbr.predict(X_train)
print('MAE:', mean_absolute_error(y_train, grid_gbr_pred))
print('MAPE:', mean_absolute_percentage_error(y_train, grid_gbr_pred))
print('MSE:', mean_squared_error(y_train, grid_gbr_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_train, grid_gbr_pred)))
print('R2:', r2_score(y_train, grid_gbr_pred))

MAE: 0.0442252552863788
MAPE: 0.0030388855382107176
MSE: 0.0036795387699792696
RMSE: 0.06065920185742036
R2: 0.9996827724658555


In [10]:
import pickle
data = {"model": gbr}
with open('Draft_gbr_saved_steps.pkl', 'wb') as file:
    pickle.dump(data, file)

In [None]:
scaler = pickle.dump(sc, open('scaler.pkl', 'wb'))

In [None]:
with open('gbr_saved_steps.pkl', 'rb') as file:
    data = pickle.load(file)
gbr_loaded = data["model"]

In [None]:
nh = pd.read_excel('New Holland input data.xlsx')
nh_input=nh.drop(['Actual'], axis=1).values
nh_scaler=sc.transform(nh_input)
nh_actual=nh['Actual'].values

In [None]:
nh_gbr_pred = gbr.predict(nh_scaler)

In [None]:
nh_gbr_pred, nh_actual

(array([10.14221542,  9.32061331,  7.92615741,  6.11374493,  4.87694034,
         3.63949815,  9.97645166,  8.59282397,  7.56871548,  5.96327813,
         4.96013739,  3.51124139,  9.26683548,  8.16750317,  6.90825378,
         5.64888931,  4.50943294,  3.31771527,  9.16369077,  8.01861369,
         6.70868457,  5.74076822,  4.17068452,  3.01867244,  8.8793055 ,
         7.46074798,  6.2307104 ,  5.42910773,  3.94317021,  2.60798289,
         8.29129359,  7.54950161,  5.76138327,  5.08453156,  3.73581767,
         2.51445575]),
 array([10.18155   ,  9.47316523,  8.08564258,  6.3113567 ,  5.1126279 ,
         3.8516388 , 10.01615436,  8.76771013,  7.74053108,  5.91606398,
         5.01109371,  3.44530648,  9.78793153,  8.29815729,  7.04589656,
         5.59187492,  4.52856548,  3.12465595,  9.28420029,  8.14887459,
         6.62151732,  5.17766167,  4.01587904,  3.01218262,  8.95870589,
         7.89719289,  6.14110306,  5.16535852,  3.84112457,  2.57339778,
         8.01564852,  7.0929

In [None]:
# print('MAE:', mean_absolute_error(nh_gbr_pred, nh_actual))
# print('MAPE:', mean_absolute_percentage_error(nh_gbr_pred, nh_actual))
# print('MSE:', mean_squared_error(nh_gbr_pred, nh_actual))
# print('R2:', r2_score(nh_gbr_pred, nh_actual))