In [2]:
from model.notebooks.Utils import load_data, true, false, impute_data, resample_data
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer
import numpy as np
import xgboost as xgb
import joblib
import os
import json

columns_to_exclude = ['steam_id', 'created_at', 'name', 'total_matches', 'updated_at', 'has_ban', 'rank_premier', 'rank_faceit_elo']

player_data = load_data('data.csv')
imputed_player_data = impute_data(player_data)
X = imputed_player_data.drop(columns=columns_to_exclude)
y = imputed_player_data['has_ban'].map({true: 1, false: 0})

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

X_train_resampled, y_train_resampled = resample_data(X_train, y_train)

xgb_clf = xgb.XGBClassifier(
    n_estimators=300,
    max_depth=15,
    learning_rate=0.5,
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss'
)

xgb_clf.fit(X_train_resampled, y_train_resampled)

os.makedirs('../app/models', exist_ok=True)

model_path = '../app/models/xgboost_model.pkl'
joblib.dump(xgb_clf, model_path)

feature_names = X_train.columns.tolist()
feature_config = {
  'features': feature_names,
  'num_features': len(feature_names),
  'model_type': 'XGBoost',
  'version': '1.0',
  'hyperparameters': {
      'n_estimators': 300,
      'max_depth': 15,
      'learning_rate': 0.5
  }
}

config_path = '../app/models/model_config.json'
with open(config_path, 'w') as f:
  json.dump(feature_config, f, indent=2)
print(f"Feature config saved to: {config_path}")

knn_imputer = KNNImputer(n_neighbors=5, weights='distance')

X_train_for_imputer = X_train.copy()
X_train_for_imputer[X_train_for_imputer == 0] = np.nan
knn_imputer.fit(X_train_for_imputer)

imputer_path = '../app/models/knn_imputer.pkl'
joblib.dump(knn_imputer, imputer_path)

Loading data from data.csv...
Loaded 423965 players from CSV
Loading imputed data from imputed.csv...
Loaded 423965 players from CSV
Loading resampled data from resampled.csv...
Loaded 567939 resampled samples from CSV
  Banned: 302,269 (53.22%)
  Non-banned: 265,670 (46.78%)
Feature config saved to: ../app/models/model_config.json


['../app/models/knn_imputer.pkl']