In [3]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl (101.7 MB)
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/101.7 MB 640.0 kB/s eta 0:02:39
   ---------------------------------------- 0.1/101.7 MB 812.7 kB/s eta 0:02:06
   ---------------------------------------- 0.3/101.7 MB 2.5 MB/s eta 0:00:40
   ---------------------------------------- 0.4/101.7 MB 2.6 MB/s eta 0:00:40
   ---------------------------------------- 0.7/101.7 MB 3.5 MB/s eta 0:00:30
   ---------------------------------------- 1.1/101.7 MB 4.3 MB/s eta 0:00:24
    --------------------------------------- 1.5/101.7 MB 5.1 MB/s eta 0:00:20
    --------------------------------------- 2.0/101.7 MB 5.7 MB/s eta 0:00:18
    --------------------------------------- 

In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

In [7]:
# Load the dataset
df = pd.read_csv("dataset_1.csv")

# Separate feature and target
X = df.drop(['ROP_AVG'], axis=1)
y = df['ROP_AVG']

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=10)

# Define models
models = {
    'Gradient Boosting': GradientBoostingRegressor(
        min_samples_leaf=6, max_depth=20, n_estimators=100, learning_rate=0.1, random_state=10
    ),
    'Random Forest': RandomForestRegressor(
        n_estimators=100, max_depth=20, min_samples_split=4, random_state=10
    ),
    'Support Vector Machine': SVR(
        kernel='rbf', C=1.0, epsilon=0.1
    ),
    'XGBoost': XGBRegressor(
        n_estimators=100, max_depth=10, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, random_state=10, use_label_encoder=False, eval_metric='rmse'
    ),
    'CatBoost': CatBoostRegressor(
        iterations=500, depth=10, learning_rate=0.1, l2_leaf_reg=3, verbose=0, random_state=10
    )
}

# Evaluate each model
for model_name, model in models.items():
    # Make pipeline
    steps = [('scaler', StandardScaler()), 
             ('model', model)]
    pipe = Pipeline(steps)

    # Fit pipeline to training data
    pipe.fit(X_train, y_train)

    # Evaluate model with R2, RMSE, and MAE metrics
    train_score = pipe.score(X_train, y_train)
    test_score = pipe.score(X_test, y_test)

    # Predictions
    y_pred_train = pipe.predict(X_train)
    y_pred_test = pipe.predict(X_test)

    # Calculate RMSE and MAE
    train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
    test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))

    train_mae = mean_absolute_error(y_train, y_pred_train)
    test_mae = mean_absolute_error(y_test, y_pred_test)

    # Print metrics
    print(f"\nModel: {model_name}")
    print(f'R2 on train set: {train_score:.2f}')
    print(f'R2 on test set: {test_score:.2f}')
    print(f'RMSE on train set: {train_rmse:.4f}')
    print(f'RMSE on test set: {test_rmse:.4f}')
    print(f'MAE on train set: {train_mae:.4f}')
    print(f'MAE on test set: {test_mae:.4f}')



Model: Gradient Boosting
R2 on train set: 0.98
R2 on test set: 0.77
RMSE on train set: 0.0002
RMSE on test set: 0.0007
MAE on train set: 0.0001
MAE on test set: 0.0005

Model: Random Forest
R2 on train set: 0.90
R2 on test set: 0.71
RMSE on train set: 0.0005
RMSE on test set: 0.0007
MAE on train set: 0.0003
MAE on test set: 0.0005

Model: Support Vector Machine
R2 on train set: -0.66
R2 on test set: -0.98
RMSE on train set: 0.0019
RMSE on test set: 0.0019
MAE on train set: 0.0017
MAE on test set: 0.0017

Model: XGBoost
R2 on train set: 0.90
R2 on test set: 0.73
RMSE on train set: 0.0005
RMSE on test set: 0.0007
MAE on train set: 0.0003
MAE on test set: 0.0005


Parameters: { "use_label_encoder" } are not used.




Model: CatBoost
R2 on train set: 1.00
R2 on test set: 0.64
RMSE on train set: 0.0000
RMSE on test set: 0.0008
MAE on train set: 0.0000
MAE on test set: 0.0006
