In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split

# Generate a random classification dataset
X_class, y_class = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=42)

# Generate a random regression dataset
X_reg, y_reg = make_regression(n_samples=1000, n_features=20, n_informative=15, noise=0.1, random_state=42)

# Split the datasets into training and testing sets
X_class_train, X_class_test, y_class_train, y_class_test = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)


In [2]:
#!pip install xgboost


Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Downloading xgboost-2.1.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.0-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB 330.3 kB/s eta 0:06:19
   ---------------------------------------- 0.0/124.9 MB 326.8 kB/s eta 0:06:23
   ---------------------------------------- 0.1/124.9 MB 581.0 kB/s eta 0:03:35
   ---------------------------------------- 0.1/124.9 MB 602.4 kB/s eta 0:03:28
   ---------------------------------------- 0.2/124.9 MB 942.1 kB/s eta 0:02:13
   ---------------------------------------- 0.3/124.9 MB 874.6 kB/s eta 0:02:23
   ---------------------------------------- 0.4/124.9 MB 969.8 kB/s eta 0:02:09
   ---------------------------------------- 0.4/124.9 MB 998.3 kB/s eta 


[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: C:\Users\princ\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [3]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, mean_squared_error

# Define the XGBoost models
xgb_class_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_reg_model = xgb.XGBRegressor()


In [4]:
from sklearn.model_selection import GridSearchCV

# Hyperparameter grid for classification
param_grid_class = {
    'n_estimators': [100, 200],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0]
}

# Hyperparameter grid for regression
param_grid_reg = {
    'n_estimators': [100, 200],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0]
}

# Set up GridSearchCV for classification
grid_search_class = GridSearchCV(estimator=xgb_class_model, param_grid=param_grid_class, scoring='accuracy', cv=3, n_jobs=-1)
grid_search_class.fit(X_class_train, y_class_train)

# Set up GridSearchCV for regression
grid_search_reg = GridSearchCV(estimator=xgb_reg_model, param_grid=param_grid_reg, scoring='neg_mean_squared_error', cv=3, n_jobs=-1)
grid_search_reg.fit(X_reg_train, y_reg_train)


Parameters: { "use_label_encoder" } are not used.



In [5]:
# Best parameters for classification
best_params_class = grid_search_class.best_params_
print("Best parameters for classification:", best_params_class)

# Best parameters for regression
best_params_reg = grid_search_reg.best_params_
print("Best parameters for regression:", best_params_reg)

# Train the best models
best_class_model = grid_search_class.best_estimator_
best_reg_model = grid_search_reg.best_estimator_

# Make predictions
y_class_pred = best_class_model.predict(X_class_test)
y_reg_pred = best_reg_model.predict(X_reg_test)

# Evaluate classification model
accuracy = accuracy_score(y_class_test, y_class_pred)
print("Classification accuracy:", accuracy)

# Evaluate regression model
mse = mean_squared_error(y_reg_test, y_reg_pred)
print("Regression mean squared error:", mse)


Best parameters for classification: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Best parameters for regression: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Classification accuracy: 0.945
Regression mean squared error: 5098.173568439944
