In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from scipy.io import arff
from xgboost import XGBClassifier

In [2]:
data, meta = arff.loadarff('Emotionsg.arff')
data = pd.DataFrame(data)
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

In [3]:
X = data.drop(columns=['label'])
y = data['label']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
param_grid_xgb = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

In [6]:
xgb_model = XGBClassifier(random_state=42)
grid_search_xgb = GridSearchCV(xgb_model, param_grid_xgb, cv=5, scoring='accuracy')
grid_search_xgb.fit(X_train, y_train)
best_xgb_model = grid_search_xgb.best_estimator_
y_pred_xgb = best_xgb_model.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)

In [7]:
print(f'Best XGBoost Parameters: {grid_search_xgb.best_params_}')
print(f'XGBoost Accuracy: {accuracy_xgb:.2f}')

Best XGBoost Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
XGBoost Accuracy: 0.97
