#Xgboost model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from xgboost import XGBRegressor
from sklearn.impute import SimpleImputer

# Load the training and test datasets
train_data = pd.read_csv('/content/train.csv', dtype={'yr': str})
test_data = pd.read_csv('/content/test.csv', dtype={'yr': str})

# Define features and target variable
numeric_features = ['GP', 'Min_per', 'Ortg', 'usg', 'eFG', 'TS_per', 'ORB_per', 'DRB_per',
                   'AST_per', 'TO_per', 'FTM', 'FTA', 'FT_per', 'twoPM', 'twoPA', 'twoP_per',
                   'TPM', 'TPA', 'TP_per', 'blk_per', 'stl_per', 'ftr', 'porpag', 'adjoe',
                   'Rec_Rank', 'ast_tov', 'rim_ratio', 'mid_ratio', 'dunks_ratio',
                   'pick', 'drtg', 'adrtg', 'dporpag', 'stops', 'bpm', 'obpm', 'dbpm', 'gbpm',
                   'mp', 'ogbpm', 'dgbpm', 'oreb', 'dreb', 'treb', 'ast', 'stl', 'blk', 'pts']

target = 'drafted'

# Drop rows with NaN values in the selected numeric features
train_data = train_data.dropna(subset=numeric_features)

# Split data into features and target
X = train_data[numeric_features]
y = train_data[target]

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train an XGBoost Regressor
xgb_regressor = XGBRegressor(random_state=42)
xgb_regressor.fit(X_train, y_train)

# Predict probabilities on the validation set
y_val_pred_prob = xgb_regressor.predict(X_val)

# Calculate AUROC score
auroc_score = roc_auc_score(y_val, y_val_pred_prob)
print(f'AUROC Score: {auroc_score:.4f}')

# Initialize a SimpleImputer to fill missing values with the mean
imputer = SimpleImputer(strategy='mean')
X_test_imputed = imputer.fit_transform(test_data[numeric_features])

# Preprocess test data using standard scaling
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test_imputed)

# Predict on the test set
y_test_pred_prob = xgb_regressor.predict(X_test_scaled)

# Submission file
submission = pd.DataFrame({'player_id': test_data['player_id'], 'drafted': y_test_pred_prob})
submission.to_csv('submission_xgboost.csv', index=False)
