In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import log_loss

train=pd.read_csv("/kaggle/input/mock-test-2-mse-2/train.csv")
test=pd.read_csv("/kaggle/input/mock-test-2-mse-2/test.csv")

train.isnull().sum()

test.isnull().sum()

test_id=test['id']
test=test.drop(columns=['id'])

train=train.drop(columns=['id'])
X=train.drop(columns=['Status'])
y=train['Status']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

numeric_features=X.select_dtypes(include=['int64','float64']).columns
categorical_features=X.select_dtypes(include=['object']).columns

numerical_pipeline=Pipeline(steps=[
    ('impute',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])
categorical_pipeline=Pipeline(steps=[
    ('impute',SimpleImputer(strategy='most_frequent')),
    ('encode',OneHotEncoder(handle_unknown='ignore'))
])

preprocessing=ColumnTransformer(transformers=[
    ('num',numerical_pipeline,numeric_features),
    ('cat',categorical_pipeline,categorical_features)
])


model = GradientBoostingClassifier(
    n_estimators=920,
    learning_rate=0.02,
    max_depth=4,
    min_samples_split=2,
    min_samples_leaf=1,
    subsample=0.7,
    random_state=42
)

pipeline=Pipeline(steps=[
    ('preprocessor',preprocessing),
    ('model',model)
])

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)  # fit on train
y_test_enc = le.transform(y_test)        # transform test

pipeline.fit(X_train,y_train_enc)

y_proba = pipeline.predict_proba(X_test)  # shape: (n_samples, n_classes)

loss = log_loss(y_test_enc, y_proba)
print("Log Loss:", loss)

y_final=pipeline.predict_proba(test)

y_final

class_names = le.classes_  # use label encoder mapping
submission = pd.DataFrame(y_final, columns=[f"Status_{cls}" for cls in class_names])
submission.insert(0, 'id', test_id)
submission.to_csv("submission4.csv", index=False)
print("\nâœ… Submission file created successfully!")
print(submission.head())


In [None]:
model = RandomForestClassifier(
    n_estimators=600,
    max_depth=16,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

In [None]:
model = RandomForestClassifier(
    n_estimators=800,
    max_depth=16,
    min_samples_split=5,
    min_samples_leaf=2,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

In [None]:
model = CatBoostClassifier(
    loss_function="MultiClass",
    iterations=2000,
    learning_rate=0.03,
    depth=8,
    eval_metric="MultiClass",
    random_seed=42,
    verbose=False
)

In [None]:
model = lgb.LGBMClassifier(
    objective="multiclass",
    num_class=num_classes,
    n_estimators=2500,
    learning_rate=0.02,
    num_leaves=128,
    max_depth=-1,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.1,
    reg_lambda=0.1,
    random_state=42
)