In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# LOAD DATA
train = pd.read_csv("/mnt/data/train.csv")
test  = pd.read_csv("/mnt/data/test.csv")

# REAL TARGET = Status
target = "Status"

# Separate X and y
y = train[target]                # C, CL, D
X = train.drop(["id", target], axis=1)
test_id = test["id"]
X_test = test.drop(["id"], axis=1)

# Handle missing
X = X.fillna(X.median(numeric_only=True))
X_test = X_test.fillna(X_test.median(numeric_only=True))
X = X.fillna("missing")
X_test = X_test.fillna("missing")

# Encode categorical variables
X = pd.get_dummies(X, drop_first=True)
X_test = pd.get_dummies(X_test, drop_first=True)

# Align columns
X, X_test = X.align(X_test, join="left", axis=1)
X_test = X_test.fillna(0)

# Encode target
le = LabelEncoder()
y_enc = le.fit_transform(y)   # C=0, CL=1, D=2

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y_enc, test_size=0.2, random_state=42)

# Train RandomForest (multi-class)
model = RandomForestClassifier(
    n_estimators=500,
    max_depth=12,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

print("Validation Accuracy:", accuracy_score(y_val, model.predict(X_val)))

# Predict probabilities for submission
proba = model.predict_proba(X_test)

# Submission
submission = pd.DataFrame({
    "id": test_id,
    "Status_C": proba[:, 0],
    "Status_CL": proba[:, 1],
    "Status_D": proba[:, 2]
})

submission.to_csv("submission.csv", index=False)
print("submission.csv saved!")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# LOAD DATA
train = pd.read_csv("/kaggle/input/mock-test-2-mse-2/train.csv")
test  = pd.read_csv("/kaggle/input/mock-test-2-mse-2/test.csv")

TARGET = "Status"

# Separate features & target
y = train[TARGET]
X = train.drop(["id", TARGET], axis=1)

test_id = test["id"]
X_test = test.drop(["id"], axis=1)

# Handle missing values
X = X.fillna(X.median(numeric_only=True))
X_test = X_test.fillna(X_test.median(numeric_only=True))
X = X.fillna("missing")
X_test = X_test.fillna("missing")

# One-hot encode categorical features
X = pd.get_dummies(X)
X_test = pd.get_dummies(X_test)

# Align train & test
X, X_test = X.align(X_test, join="left", axis=1, fill_value=0)

# Encode target dynamically
le = LabelEncoder()
y_enc = le.fit_transform(y)

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y_enc, test_size=0.2, random_state=42
)

# Train model
model = RandomForestClassifier(
    n_estimators=400,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train, y_train)

print("Validation Accuracy:", accuracy_score(y_val, model.predict(X_val)))

# Predict probabilities
proba = model.predict_proba(X_test)

# ðŸ”¥ DYNAMIC SUBMISSION CREATION
class_names = le.classes_   # auto-detect class labels

submission = pd.DataFrame({"id": test_id})

for i, cls in enumerate(class_names):
    submission[f"{TARGET}_{cls}"] = proba[:, i]

submission.to_csv("submission.csv", index=False)
print("submission.csv saved!")


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# ==============================
# LOAD DATA (CHANGE PATHS ONLY)
# ==============================
train = pd.read_csv("/kaggle/input/mock-test-2-mse-2/train.csv")
test  = pd.read_csv("/kaggle/input/mock-test-2-mse-2/test.csv")

TARGET = "Status"   # change if needed

# ==============================
# SPLIT FEATURES & TARGET
# ==============================
y = train[TARGET]
X = train.drop(["id", TARGET], axis=1)

test_id = test["id"]
X_test = test.drop(["id"], axis=1)

# ==============================
# HANDLE MISSING VALUES
# ==============================
X = X.fillna("missing")
X_test = X_test.fillna("missing")

# ==============================
# SAFE CATEGORICAL ENCODING
# (FIXES unseen label error)
# ==============================
X = pd.get_dummies(X)
X_test = pd.get_dummies(X_test)

# Align train & test columns
X, X_test = X.align(X_test, join="left", axis=1, fill_value=0)

# ==============================
# ENCODE TARGET (ONLY HERE)
# ==============================
le = LabelEncoder()
y_enc = le.fit_transform(y)

# ==============================
# TRAIN / VALIDATION SPLIT
# ==============================
X_train, X_val, y_train, y_val = train_test_split(
    X, y_enc,
    test_size=0.2,
    random_state=42,
    stratify=y_enc
)

# ==============================
# TRAIN MODEL (MULTICLASS)
# ==============================
model = RandomForestClassifier(
    n_estimators=500,
    max_depth=14,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

print("Validation Accuracy:",
      accuracy_score(y_val, model.predict(X_val)))

# ==============================
# PREDICT PROBABILITIES
# ==============================
proba = model.predict_proba(X_test)

# ==============================
# DYNAMIC SUBMISSION CREATION
# ==============================
submission = pd.DataFrame({"id": test_id})

for i, cls in enumerate(le.classes_):
    submission[f"{TARGET}_{cls}"] = proba[:, i]

submission.to_csv("submission.csv", index=False)
print("submission.csv saved!")
