Import necessary modules

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

from sklearn.metrics import mean_squared_error

#### Load processed dataset

In [2]:
# load dataset
data = pd.read_csv('Processed_Con_Train.csv')

# descriptive features
X = data.iloc[:,1:-1]
X = X.select_dtypes(include=np.number)

# target label
y = data.iloc[:,-1].values

print("Descriptive Features (X) Dimensions: ", X.shape)
print("Target Label (y) dimensions: ", y.shape)

Descriptive Features (X) Dimensions:  (3870, 19)
Target Label (y) dimensions:  (3870,)


#### Split Train data in the ratio of 75:25 for training & Validation

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1234)

In [4]:
# Standardize features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### 1. Linear Regression

In [5]:
# Initialize and train the Linear Regresson
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_lr = lin_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_lr, squared=False)
print("RMSE of Linear Regression: ", rmse)

RMSE of Linear Regression:  412.6906384864953


#### 2. Support Vector Regressor (SVR) 

In [6]:
# Initialize and train the Support Vector Regressor (SVR)
svr_reg = SVR(kernel="linear")  # choose a different kernel (linear, rbf, sigmoid) if needed
svr_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_svr = svr_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_svr, squared=False)
print("RMSE of SVR: ", rmse)

RMSE of SVR:  416.55830127038485


#### 3. Random Forest Regressor

In [7]:
# Initialize and train the Random Forest Regressor (SVR)
rf_reg = RandomForestRegressor(random_state=1234)
rf_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_rf = rf_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_rf, squared=False)
print("RMSE of Random Forest Regressor: ", rmse)

RMSE of Random Forest Regressor:  370.89987544864886


#### 4. XGBoost Regressor

In [8]:
# Initialize and train the XGBoost Regressor
xgb_reg = xgb.XGBRegressor(random_state=1234)
xgb_reg.fit(X_train, y_train)

# make predictions on validation data
y_pred_xgb = xgb_reg.predict(X_test)

# compute & display RMSE
rmse = mean_squared_error(y_test, y_pred_xgb, squared=False)
print("RMSE of XGBoost Regressor: ", rmse)

RMSE of XGBoost Regressor:  403.5319638667749


In [9]:
# Display All models predictions
for i in range(len(y_test)):
    print(y_test[i], y_pred_lr[i], y_pred_svr[i], y_pred_rf[i], y_pred_xgb[i], sep=" :: ")

200.0 :: 139.10118891294258 :: 198.4680907612033 :: 188.1 :: 202.24039
910.0 :: 974.7971541910122 :: 1012.3262831729 :: 953.08 :: 820.1601
400.0 :: 493.28616927649824 :: 376.73102601994253 :: 470.85 :: 347.39136
800.0 :: 998.1531494690834 :: 972.9330927085914 :: 1949.34 :: 936.65295
400.0 :: 616.2385992787933 :: 596.2441899260643 :: 394.9333333333333 :: 315.8685
480.0 :: 498.36695554560697 :: 450.68223172469834 :: 499.54 :: 481.31393
300.0 :: 380.49030730446043 :: 354.8427936107496 :: 281.86 :: 271.4833
1500.0 :: 1176.6704652861436 :: 1046.0227445882256 :: 1449.75 :: 1468.193
540.0 :: 392.67297090653983 :: 529.8069485984222 :: 593.1 :: 582.9793
250.0 :: 179.01671036425978 :: 258.3900377263346 :: 305.4 :: 376.44247
200.0 :: 328.3064082989776 :: 323.6294254824418 :: 200.0 :: 204.60301
300.0 :: 395.26420252504704 :: 375.7142592540237 :: 368.2421428571429 :: 347.69376
450.0 :: 582.876527785071 :: 400.67565381711415 :: 417.38 :: 436.62344
700.0 :: 686.6520942787698 :: 650.7933343733007 :: 6