In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import shap  # or import lime if you prefer LIME

In [None]:
data = pd.read_excel('data.xlsx')
# Inspect the data
print(data.head())

In [None]:
# Features and target variable
X = data.drop('target_column', axis=1)  # replace 'target_column' with your actual target column name
y = data['target_column']

# TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

In [None]:
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [None]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_train_pred) * 100
test_accuracy = accuracy_score(y_test, y_test_pred) * 100

print(f'Train Accuracy: {train_accuracy:.2f}%')
print(f'Test Accuracy: {test_accuracy:.2f}%')

In [None]:
conf_matrix = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
explainer = shap.Explainer(model)
shap_values = explainer(X)

# Summary plot
shap.summary_plot(shap_values, X)

# Dependence plot for a specific feature
shap.dependence_plot('feature_name', shap_values.values, X)  # replace 'feature_name' with your actual feature name

Summary of the Implementation
Model: Implemented an XGBoost classifier.
Train/Test Split: Used TimeSeriesSplit from scikit-learn.
Accuracy: Calculated and printed train and test accuracy scores in percentage.
Confusion Matrix: Plotted confusion matrix to visualize prediction performance.
Interpretation: Utilized SHAP for model interpretation to understand feature impacts on predictions.