In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score
import xgboost as xgb

# Load data
X = pd.read_csv('/Users/brianmewhinney/dev/tradewinds-python/data/8/x.csv').values
y = pd.read_csv('/Users/brianmewhinney/dev/tradewinds-python/data/8/y.csv').values.flatten()

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale data
scaler = MinMaxScaler()
X_resampled = scaler.fit_transform(X_train)

# Grid search for best parameters
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.005, 0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

grid_search = GridSearchCV(xgb.XGBClassifier(random_state=42, eval_metric='mlogloss'), 
                           param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_resampled, y_train)
print("Best Parameters:", grid_search.best_params_)

# Evaluate best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(scaler.transform(X_test))
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy:.2f}')

# Feature importance
importances = best_model.feature_importances_
sorted_idx = np.argsort(importances)[::-1]
print("XGBoost feature importance:")
for idx in sorted_idx:
    print(f"Feature {idx}: {importances[idx]:.4f}")




Best Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}
Test Accuracy: 0.60
XGBoost feature importance:
Feature 1: 0.1670
Feature 0: 0.1194
Feature 3: 0.0593
Feature 2: 0.0488
Feature 20: 0.0422
Feature 31: 0.0346
Feature 14: 0.0325
Feature 16: 0.0322
Feature 11: 0.0316
Feature 25: 0.0301
Feature 27: 0.0285
Feature 4: 0.0268
Feature 21: 0.0267
Feature 8: 0.0259
Feature 10: 0.0246
Feature 26: 0.0231
Feature 24: 0.0225
Feature 22: 0.0214
Feature 29: 0.0204
Feature 28: 0.0197
Feature 13: 0.0190
Feature 12: 0.0186
Feature 19: 0.0172
Feature 17: 0.0171
Feature 30: 0.0167
Feature 18: 0.0167
Feature 6: 0.0153
Feature 5: 0.0145
Feature 7: 0.0097
Feature 9: 0.0065
Feature 15: 0.0057
Feature 23: 0.0056
