In [None]:
 import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

TARGET = "target"
X = train.drop(columns=[TARGET])
y = train[TARGET]
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object", "category"]).columns
num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

cat_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", num_pipeline, num_cols),
    ("cat", cat_pipeline, cat_cols)
])
model = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight="balanced"
)

pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("model", model)
])
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
pipeline.fit(X_train, y_train)
val_preds = pipeline.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))
pipeline.fit(X, y)
test_preds = pipeline.predict(test)
test_preds = label_encoder.inverse_transform(test_preds)

submission = pd.DataFrame({
    "id": test.index,
    "target": test_preds
})

submission.to_csv("submission.csv", index=False)
print("submission.csv created!")
