In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
df = pd.read_csv("synthetic_data.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Payload (Ton),Empty Travel Distance (km),Load Travel Distance (km),Empty Speed (km/hr),Load Speed (km/hr),Empty Travel Time (hr),Empty Stop Time (hr),Load Time (hr),Load Stop Time (hr),Load Travel Time (hr),TKPH (Ton.Km/Hr)
0,0,153.689871,3.458257,1.731073,13.853573,17.72295,0.094863,0.001,0.059778,0.016736,0.137024,71.884526
1,1,142.843169,3.058542,2.183726,17.734274,20.612661,0.093944,0.020299,0.026,0.012971,0.147496,111.959138
2,2,156.268816,2.330209,1.631066,19.389591,20.783555,0.053,0.003187,0.058721,0.015621,0.127955,80.599379
3,3,171.221396,1.735279,2.044309,27.915883,22.361973,0.097767,0.02486,0.044494,0.022718,0.052,156.645315
4,4,141.205192,2.867904,0.8,22.984029,15.975268,0.130718,0.043222,0.058612,0.027223,0.105716,273.406861


In [4]:
df = df.drop(columns=["Unnamed: 0"])

In [5]:
df.head()

Unnamed: 0,Payload (Ton),Empty Travel Distance (km),Load Travel Distance (km),Empty Speed (km/hr),Load Speed (km/hr),Empty Travel Time (hr),Empty Stop Time (hr),Load Time (hr),Load Stop Time (hr),Load Travel Time (hr),TKPH (Ton.Km/Hr)
0,153.689871,3.458257,1.731073,13.853573,17.72295,0.094863,0.001,0.059778,0.016736,0.137024,71.884526
1,142.843169,3.058542,2.183726,17.734274,20.612661,0.093944,0.020299,0.026,0.012971,0.147496,111.959138
2,156.268816,2.330209,1.631066,19.389591,20.783555,0.053,0.003187,0.058721,0.015621,0.127955,80.599379
3,171.221396,1.735279,2.044309,27.915883,22.361973,0.097767,0.02486,0.044494,0.022718,0.052,156.645315
4,141.205192,2.867904,0.8,22.984029,15.975268,0.130718,0.043222,0.058612,0.027223,0.105716,273.406861


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Payload (Ton)               1000 non-null   float64
 1   Empty Travel Distance (km)  1000 non-null   float64
 2   Load Travel Distance (km)   1000 non-null   float64
 3   Empty Speed (km/hr)         1000 non-null   float64
 4   Load Speed (km/hr)          1000 non-null   float64
 5   Empty Travel Time (hr)      1000 non-null   float64
 6   Empty Stop Time (hr)        1000 non-null   float64
 7   Load Time (hr)              1000 non-null   float64
 8   Load Stop Time (hr)         1000 non-null   float64
 9   Load Travel Time (hr)       1000 non-null   float64
 10  TKPH (Ton.Km/Hr)            1000 non-null   float64
dtypes: float64(11)
memory usage: 86.1 KB


In [7]:
X = df.drop(columns=["TKPH (Ton.Km/Hr)"])
y = df["TKPH (Ton.Km/Hr)"]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
lr = LinearRegression()
lr.fit(X_train.values, y_train.values)

In [10]:
lr_pred = lr.predict(X_test.values)

In [11]:
def evaluation(y_test, predictions):
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    return mae, mse, r2

In [12]:
lr_mae, lr_mse, lr_r2 = evaluation(y_test, lr_pred)

In [13]:
print(f"MAE: {lr_mae} | MSE: {lr_mse} | R2 SCORE: {lr_r2}")

MAE: 75.22860074546836 | MSE: 8299.85699855414 | R2 SCORE: -0.0035388425410312063


In [14]:
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)

In [15]:
rf_pred = rf.predict(X_test)

In [16]:
rf_mae, rf_mse, rf_r2 = evaluation(y_test, rf_pred)

In [17]:
print(f"MAE: {rf_mae} | MSE: {rf_mse} | R2 SCORE: {rf_r2}")

MAE: 76.6002916801525 | MSE: 8790.896729885559 | R2 SCORE: -0.06291064180309491


In [18]:
xgb_regressor = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
xgb_regressor.fit(X_train, y_train)

In [19]:
xgb_pred = xgb_regressor.predict(X_test)

In [20]:
xgb_mae, xgb_mse, xgb_r2 = evaluation(y_test, xgb_pred)

In [21]:
print(f"MAE: {xgb_mae} | MSE: {xgb_mse} | R2 SCORE: {xgb_r2}")

MAE: 87.30550126609907 | MSE: 10872.011159011885 | R2 SCORE: -0.314538972961655


In [22]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.values)
X_test_scaled = scaler.fit_transform(X_test.values)

In [23]:
svr = SVR(kernel='poly')
svr.fit(X_train_scaled, y_train)

In [24]:
svr_pred = svr.predict(X_test_scaled)

In [25]:
svr_mae, svr_mse, svr_r2 = evaluation(y_test, svr_pred)

In [26]:
print(f"MAE: {svr_mae} | MSE: {svr_mse} | R2 SCORE: {svr_r2}")

MAE: 75.55367191426193 | MSE: 8364.882676396892 | R2 SCORE: -0.011401121793448432


In [27]:
dt = DecisionTreeRegressor()

In [28]:
dt.fit(X_train, y_train)

In [29]:
dt_pred = dt.predict(X_test)

In [30]:
dt_mae, dt_mse, dt_r2 = evaluation(y_test, dt_pred)

In [31]:
print(f"MAE: {dt_mae} | MSE: {dt_mse} | R2 SCORE: {dt_r2}")

MAE: 105.26126887420872 | MSE: 17131.703742926144 | R2 SCORE: -1.071400766052585


In [32]:
ridge = Ridge()
ridge.fit(X_train.values, y_train.values)

In [33]:
ridge_pred = ridge.predict(X_test.values)

In [34]:
ridge_mae, ridge_mse, ridge_r2 = evaluation(y_test, ridge_pred)

In [35]:
print(f"MAE: {ridge_mae} | MSE: {ridge_mse} | R2 SCORE: {ridge_r2}")

MAE: 75.69690694100436 | MSE: 8378.536207350764 | R2 SCORE: -0.01305197537471292


In [36]:
gbr = GradientBoostingRegressor()
gbr.fit(X_train_scaled, y_train)

In [37]:
gbr_pred = gbr.predict(X_test_scaled)

In [38]:
gbr_mae, gbr_mse, gbr_r2 = evaluation(y_test, gbr_pred)

In [39]:
print(f"MAE: {gbr_mae} | MSE: {gbr_mse} | R2 SCORE: {gbr_r2}")

MAE: 77.52031441910034 | MSE: 8993.554083721689 | R2 SCORE: -0.08741401895002254


In [40]:
def prediction(model, scaler, new_data):
    new_data_scaled = scaler.transform(new_data)
    predictions = model.predict(new_data_scaled)
    return predictions

In [41]:
new = [[142.8, 3.0, 2.1, 17.7, 20.6, 0.09, 0.02, 0.02, 0.012, 0.14]]

new_predictions = prediction(ridge, scaler, new)

In [42]:
print("Predictions on new data:", new_predictions)

Predictions on new data: [130.19579701]
