In [1]:
!pip install pandas
!pip install scikit-learn
!pip install xgboost




In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
# Load the diamond dataset
diamonds_df = sns.load_dataset('diamonds')

# Preprocess the data if necessary (e.g., encoding categorical variables)
# Encode categorical variables
label_encoder = LabelEncoder()
diamonds_df['cut'] = label_encoder.fit_transform(diamonds_df['cut'])
diamonds_df['color'] = label_encoder.fit_transform(diamonds_df['color'])
diamonds_df['clarity'] = label_encoder.fit_transform(diamonds_df['clarity'])

# Split data into features (X) and target (y)
X = diamonds_df.drop('price', axis=1)
y = diamonds_df['price']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost Regressor
adaBoost_regressor = AdaBoostRegressor()
adaBoost_regressor.fit(X_train, y_train)

# Train XGBoost Regressor
xgBoost_regressor = XGBRegressor()
xgBoost_regressor.fit(X_train, y_train)

# Train Random Forest Regressor
randomForest_regressor = RandomForestRegressor()
randomForest_regressor.fit(X_train, y_train)

# Evaluate the performance of each regressor
adaBoost_predictions = adaBoost_regressor.predict(X_test)
xgBoost_predictions = xgBoost_regressor.predict(X_test)
randomForest_predictions = randomForest_regressor.predict(X_test)

# Calculate Mean Squared Error for each regressor
adaBoost_mse = mean_squared_error(y_test, adaBoost_predictions)
xgBoost_mse = mean_squared_error(y_test, xgBoost_predictions)
randomForest_mse = mean_squared_error(y_test, randomForest_predictions)

print("Mean Squared Error (AdaBoost):", adaBoost_mse)
print("Mean Squared Error (XGBoost):", xgBoost_mse)
print("Mean Squared Error (Random Forest):", randomForest_mse)


Mean Squared Error (AdaBoost): 1670634.8562896068
Mean Squared Error (XGBoost): 297651.2769023406
Mean Squared Error (Random Forest): 294801.2253448156


In [7]:
# Accessing XGBoost regressor parameters
xgBoost_params = xgBoost_regressor.get_params()

# Print XGBoost regressor parameters
print("XGBoost Regressor Parameters:", xgBoost_params)

XGBoost Regressor Parameters: {'objective': 'reg:squarederror', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': None, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV

diamonds_df = sns.load_dataset('diamonds')

# Preprocess the data if necessary (e.g., encoding categorical variables)
# Encode categorical variables
label_encoder = LabelEncoder()
diamonds_df['cut'] = label_encoder.fit_transform(diamonds_df['cut'])
diamonds_df['color'] = label_encoder.fit_transform(diamonds_df['color'])
diamonds_df['clarity'] = label_encoder.fit_transform(diamonds_df['clarity'])

# Split data into features (X) and target (y)
X = diamonds_df.drop('price', axis=1)
y = diamonds_df['price']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grid for Random Forest Regressor
rf_param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Initialize Random Forest Regressor
rf_regressor = RandomForestRegressor()

# Perform Grid Search for Random Forest
rf_grid_search = GridSearchCV(rf_regressor, rf_param_grid, cv=5)
rf_grid_search.fit(X_train, y_train)

# Get the best parameters and best score for Random Forest
best_rf_params = rf_grid_search.best_params_
best_rf_score = rf_grid_search.best_score_

print("Best parameters for Random Forest:", best_rf_params)
print("Best score for Random Forest:", best_rf_score)

# Define parameter grid for XGBoost Regressor
xgb_param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

# Initialize XGBoost Regressor
xgb_regressor = XGBRegressor()

# Perform Grid Search for XGBoost
xgb_grid_search = GridSearchCV(xgb_regressor, xgb_param_grid, cv=5)
xgb_grid_search.fit(X_train, y_train)

# Get the best parameters and best score for XGBoost
best_xgb_params = xgb_grid_search.best_params_
best_xgb_score = xgb_grid_search.best_score_

print("Best parameters for XGBoost:", best_xgb_params)
print("Best score for XGBoost:", best_xgb_score)


Best parameters for Random Forest: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 150}
Best score for Random Forest: 0.9806695474421693
Best parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 150}
Best score for XGBoost: 0.9815949553680431


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import  RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
# Load the diamond dataset
diamonds_df = sns.load_dataset('diamonds')

# Preprocess the data if necessary (e.g., encoding categorical variables)
# Encode categorical variables
label_encoder = LabelEncoder()
diamonds_df['cut'] = label_encoder.fit_transform(diamonds_df['cut'])
diamonds_df['color'] = label_encoder.fit_transform(diamonds_df['color'])
diamonds_df['clarity'] = label_encoder.fit_transform(diamonds_df['clarity'])

# Split data into features (X) and target (y)
X = diamonds_df.drop('price', axis=1)
y = diamonds_df['price']
#define the model
rf_regressor = RandomForestRegressor()
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train AdaBoost Regressor
adaBoost_regressor = AdaBoostRegressor()
adaBoost_regressor.fit(X_train, y_train)

# Train XGBoost Regressor
xgBoost_regressor = XGBRegressor()
xgBoost_regressor.fit(X_train, y_train)

# Train Random Forest Regressor
randomForest_regressor = RandomForestRegressor()
randomForest_regressor.fit(X_train, y_train)

# Evaluate the performance of each regressor
adaBoost_predictions = adaBoost_regressor.predict(X_test)
xgBoost_predictions = xgBoost_regressor.predict(X_test)
randomForest_predictions = randomForest_regressor.predict(X_test)

# Calculate Mean Squared Error for each regressor
adaBoost_mse = mean_squared_error(y_test, adaBoost_predictions)
xgBoost_mse = mean_squared_error(y_test, xgBoost_predictions)
randomForest_mse = mean_squared_error(y_test, randomForest_predictions)

print("Mean Squared Error (AdaBoost):", adaBoost_mse)
print("Mean Squared Error (XGBoost):", xgBoost_mse)
print("Mean Squared Error (Random Forest):", randomForest_mse)
