In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

# Load data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

# Encode categorical features
le = LabelEncoder()
for column in train.columns:
    if train[column].dtype == 'object':
        train[column] = le.fit_transform(train[column])
        if column in test.columns:
            test[column] = le.transform(test[column])

# Separate features and target
X = train.drop(columns=['id', 'Target'])
y = train['Target']

# Split the data for training and validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = XGBClassifier(
    max_depth=6,
    n_estimators=500,
    learning_rate=0.1,
    colsample_bytree=0.8,
    subsample=0.8,
    random_state=42,
    eval_metric='mlogloss'
)

# Train the model
model.fit(X_train, y_train, 
          eval_set=[(X_val, y_val)], 
          verbose=True)

# Evaluate the model
y_pred = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, y_pred))

# Prepare test data
X_test = test.drop(columns=['id'])

# Make predictions
test_preds = model.predict(X_test)

# Create submission file
submission = pd.DataFrame({'id': test['id'], 'Target': test_preds})
submission.to_csv('submission.csv', index=False)
print("Submission file created!")


[0]	validation_0-mlogloss:1.01495
[1]	validation_0-mlogloss:0.94367
[2]	validation_0-mlogloss:0.88377
[3]	validation_0-mlogloss:0.83183
[4]	validation_0-mlogloss:0.78738
[5]	validation_0-mlogloss:0.74877
[6]	validation_0-mlogloss:0.71477
[7]	validation_0-mlogloss:0.68520
[8]	validation_0-mlogloss:0.65898
[9]	validation_0-mlogloss:0.63580
[10]	validation_0-mlogloss:0.61612
[11]	validation_0-mlogloss:0.59837
[12]	validation_0-mlogloss:0.58278
[13]	validation_0-mlogloss:0.56847
[14]	validation_0-mlogloss:0.55620
[15]	validation_0-mlogloss:0.54530
[16]	validation_0-mlogloss:0.53517
[17]	validation_0-mlogloss:0.52627
[18]	validation_0-mlogloss:0.51832
[19]	validation_0-mlogloss:0.51097
[20]	validation_0-mlogloss:0.50455
[21]	validation_0-mlogloss:0.49885
[22]	validation_0-mlogloss:0.49383
[23]	validation_0-mlogloss:0.48912
[24]	validation_0-mlogloss:0.48483
[25]	validation_0-mlogloss:0.48108
[26]	validation_0-mlogloss:0.47741
[27]	validation_0-mlogloss:0.47443
[28]	validation_0-mlogloss:0.4