In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import train_test_split

# Load datasets
train_data = pd.read_csv('/content/drive/MyDrive/SE355/Thực hành/Dataset/train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/SE355/Thực hành/Dataset/test.csv')

# Specify feature columns and target column
# Replace 'target' with the actual name of your target column
target_column = "cost"
feature_columns = [col for col in train_data.columns if col != target_column]

# Split features and target
X = train_data[feature_columns]
y = train_data[target_column]

# Split training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Prepare test data
X_test = test_data[feature_columns]


dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
dtest = xgb.DMatrix(X_test)

params = {
    "objective": "reg:squarederror",
    "learning_rate": 0.1,
    "max_depth": 6,
    "random_state": 42
}

model = xgb.train(
    params,
    dtrain,
    num_boost_round=100,
    evals=[(dval, "validation")],
    early_stopping_rounds=10,
    verbose_eval=True
)

# Make predictions on test set
test_predictions = model.predict(dtest)

# Create predictions DataFrame
predictions = pd.DataFrame({
    'id': test_data['id'],
    'cost': test_predictions
})
# Save predictions to CSV
predictions.to_csv("/content/drive/MyDrive/SE355/Thực hành/Outputs/xgboost.csv", index=False)

# Evaluate on validation set
val_predictions = model.predict(dval)
rmsle = mean_squared_log_error(y_val, val_predictions) ** 0.5
print(f"Validation RMSLE: {rmsle}")


[0]	validation-rmse:29.78001
[1]	validation-rmse:29.62030
[2]	validation-rmse:29.48880
[3]	validation-rmse:29.37872
[4]	validation-rmse:29.28522
[5]	validation-rmse:29.20967
[6]	validation-rmse:29.13825
[7]	validation-rmse:29.08037
[8]	validation-rmse:29.02888
[9]	validation-rmse:28.98616
[10]	validation-rmse:28.94928
[11]	validation-rmse:28.91710
[12]	validation-rmse:28.88973
[13]	validation-rmse:28.86107
[14]	validation-rmse:28.83702
[15]	validation-rmse:28.81486
[16]	validation-rmse:28.79469
[17]	validation-rmse:28.77864
[18]	validation-rmse:28.76495
[19]	validation-rmse:28.75241
[20]	validation-rmse:28.73941
[21]	validation-rmse:28.72455
[22]	validation-rmse:28.70974
[23]	validation-rmse:28.68967
[24]	validation-rmse:28.67950
[25]	validation-rmse:28.66798
[26]	validation-rmse:28.65651
[27]	validation-rmse:28.64366
[28]	validation-rmse:28.63261
[29]	validation-rmse:28.62077
[30]	validation-rmse:28.61006
[31]	validation-rmse:28.59700
[32]	validation-rmse:28.59031
[33]	validation-rmse