In [None]:
import pandas as pd
import numpy as np
import warnings
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

In [None]:
warnings.filterwarnings('ignore')

train_df = pd.read_csv('/kaggle/input/signal-cluster-classification-dataset/train.csv')
test_df = pd.read_csv('/kaggle/input/signal-cluster-classification-dataset/test.csv')

In [None]:
ID_COL = 'sample_id'
TARGET_COL = 'category'

X = train_df.drop(columns=[TARGET_COL, ID_COL])
y = train_df[TARGET_COL]
X_test = test_df.drop(columns=[ID_COL])

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [None]:
# Ultra-fine tuning grid designed for high complexity (overfitting allowed)
param_grid = {
    'n_estimators': [4000, 6000, 8000, 10000],   # Very high iteration count
    'learning_rate': [0.001, 0.005, 0.01],       # Tiny steps for precision
    'max_depth': [20, 30, 40, 50, None],         # Extremely deep trees
    'min_child_weight': [0, 1],                  # Allow splitting on single instances
    'subsample': [0.75, 0.85, 0.95, 1.0],        # varied sampling
    'colsample_bytree': [0.75, 0.85, 0.95, 1.0], 
    'gamma': [0, 0.001],                         # No loss reduction required to split
    'reg_alpha': [0, 0.001],                     # Zero L1 regularization
    'reg_lambda': [0, 0.001]                     # Zero L2 regularization
}

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    tree_method='hist',
    random_state=42,
    n_jobs=-1
)

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

search = RandomizedSearchCV(
    estimator=xgb_model,
    param_distributions=param_grid,
    n_iter=50,            # 50 candidates (High computational cost)
    scoring='accuracy',
    cv=cv,
    verbose=1,
    random_state=42,
    n_jobs=-1
)

search.fit(X, y_encoded)

In [None]:
test_predictions_encoded = search.predict(X_test)
test_predictions = le.inverse_transform(test_predictions_encoded)

submission = pd.DataFrame({
    ID_COL: test_df[ID_COL],
    TARGET_COL: test_predictions
})

submission.to_csv('submission.csv', index=False)
print(f"Success! Best CV Accuracy: {search.best_score_:.5f}")