In [3]:
!pip install xgboost


Collecting xgboost
  Using cached xgboost-3.0.2-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.2-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/150.0 MB ? eta -:--:--
   ---------------------------------------- 1.6/150.0 MB 7.6 MB/s eta 0:00:20
    --------------------------------------- 2.1/150.0 MB 5.3 MB/s eta 0:00:28
    --------------------------------------- 3.4/150.0 MB 5.0 MB/s eta 0:00:30
   - -------------------------------------- 4.2/150.0 MB 4.7 MB/s eta 0:00:32
   - -------------------------------------- 5.0/150.0 MB 4.4 MB/s eta 0:00:33
   - -------------------------------------- 5.8/150.0 MB 4.3 MB/s eta 0:00:34
   - -------------------------------------- 6.3/150.0 MB 4.2 MB/s eta 0:00:34
   - -------------------------------------- 7.3/150.0 MB 4.1 MB/s eta 0:00:35
   -- ------------------------------------- 8.1/150.0 MB 4.1 MB/s eta 0:00:35
   -- -


[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import time

In [5]:
data = fetch_california_housing()
x ,y = data.data, data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [10]:
N_observations, N_features = x.shape
print('Number of Observations: ' + str(N_observations))
print('Number of Features: ' + str(N_features))


Number of Observations: 20640
Number of Features: 8


In [11]:
# Initialize models
n_estimators=100
rf = RandomForestRegressor(n_estimators=n_estimators, random_state=42)
xgb = XGBRegressor(n_estimators=n_estimators, random_state=42)

In [13]:
# Measure training time for Random Forest
start_time_rf = time.time()
rf.fit(x_train, y_train)
end_time_rf = time.time()
rf_train_time = end_time_rf - start_time_rf

# Measure training time for XGBoost
start_time_xgb = time.time()
xgb.fit(x_train, y_train)
end_time_xgb = time.time()
xgb_train_time = end_time_xgb - start_time_xgb

In [14]:
 # Measure prediction time for Random Forest
start_time_rf = time.time()
y_pred_rf = rf.predict(x_test)
end_time_rf = time.time()
rf_pred_time = end_time_rf - start_time_rf

# Measure prediciton time for XGBoost
start_time_xgb = time.time()
y_pred_xgb = xgb.predict(x_test)
end_time_xgb = time.time()
xgb_pred_time = end_time_xgb - start_time_xgb

In [17]:
from sklearn.metrics import r2_score
mse_rf = mean_squared_error(y_test, y_pred_rf)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_rf = r2_score(y_test, y_pred_rf)
r2_xgb = r2_score(y_test, y_pred_xgb)

In [18]:
print(f'Random Forest:  MSE = {mse_rf:.4f}, R^2 = {r2_rf:.4f}')
print(f'      XGBoost:  MSE = {mse_xgb:.4f}, R^2 = {r2_xgb:.4f}')

Random Forest:  MSE = 0.2554, R^2 = 0.8051
      XGBoost:  MSE = 0.2226, R^2 = 0.8301


In [19]:
print(f'Random Forest:  Training Time = {rf_train_time:.3f} seconds, Testing time = {rf_pred_time:.3f} seconds')
print(f'      XGBoost:  Training Time = {xgb_train_time:.3f} seconds, Testing time = {xgb_pred_time:.3f} seconds')

Random Forest:  Training Time = 11.814 seconds, Testing time = 0.258 seconds
      XGBoost:  Training Time = 0.198 seconds, Testing time = 0.015 seconds
