In [None]:
import pandas as pd
import xgboost as xgb
from xgboost import plot_importance
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load the dataset
df = pd.read_csv('voice.csv')

# Encode the label
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Select only the important features
features = ['meanfun', 'mode', 'IQR', 'Q25']
X = df[features]
y = df['label']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=777)

# Define the XGBoost model parameters
params = {
    'objective': 'binary:logistic',
    'eta': 0.1,
    'subsample': 0.6,
    'colsample_bytree': 0.5,
    'eval_metric': 'logloss',
    'use_label_encoder': False
}
num_boost_round = 500

# Train the model
dtrain = xgb.DMatrix(X_train, label=y_train)
model = xgb.train(params, dtrain, num_boost_round)

# Make predictions
dtest = xgb.DMatrix(X_test)
y_pred = model.predict(dtest)
predictions = [1 if value >= 0.5 else 0 for value in y_pred]

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


In [None]:
plot_importance(model, importance_type='weight')
plt.show()