In [None]:
import numpy as np
import xgboost as xgb
from skopt import BayesSearchCV
from skopt.space import Integer
from skopt.plots import plot_objective, plot_convergence
import matplotlib.pyplot as plt
from preprocess import get_data
import joblib


In [None]:
trainData =get_data("train/","ascii_file_counts.csv")
testData = get_data("test","ascii_file_counts.csv")
XTrain, ytrain = trainData
XTest, ytest = testData

In [None]:
model = xgb.XGBClassifier(objective='multi:softmax',n_estimators = 140, max_depth = 8,num_class=93,booster='gbtree',eval_metric= 'mlogloss', random_state=42)

In [None]:
param_space = {
    'n_estimators': Integer(10, 200),        # Number of trees
    'max_depth': Integer(3, 15),             # Depth of each tree     
}

In [None]:
bayes_opt = BayesSearchCV(
    estimator=model,
    search_spaces=param_space,
    n_iter=10,  # Number of iterations
    cv=5,       # 5-fold cross-validation
    scoring='neg_mean_squared_error',  # Objective: minimize error
    n_jobs=-1,  # Use all CPU cores
    random_state=42,
    verbose=3
)

In [None]:
bayes_opt.fit(XTrain, ytrain)

In [None]:
model.fit(XTrain, ytrain)

In [None]:
from testing_models import evaluate_model
y_pred = model.predict(XTest)
print(evaluate_model(ytest, y_pred))

In [None]:
joblib.dump(model, 'xgboost.joblib')