In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [2]:
train_df = pd.read_csv("/content/train (1).csv")
test_df = pd.read_csv("/content/test (1).csv")

print(train_df.shape)
print(test_df.shape)


(18306, 17)
(4152, 17)


In [3]:
X = train_df.drop("NObeyesdad", axis=1)
y = train_df["NObeyesdad"]


In [4]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [5]:
test_ids = test_df["id"]
X_test_final = test_df.drop("id", axis=1)


In [6]:
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object"]).columns


In [7]:
numeric_pipeline = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(transformers=[
    ("num", numeric_pipeline, num_cols),
    ("cat", categorical_pipeline, cat_cols)
])


In [8]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)


In [9]:
model = Pipeline(steps=[
    ("preprocessing", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=200,
        random_state=42
    ))
])

model.fit(X_train, y_train)


In [10]:
y_pred = model.predict(X_val)
print("Accuracy:", accuracy_score(y_val, y_pred))


Accuracy: 0.8776624795193884


In [11]:
test_predictions = model.predict(X_test_final)
final_predictions = le.inverse_transform(test_predictions)


In [12]:
submission = pd.DataFrame({
    "id": test_ids,
    "NObeyesdad": final_predictions
})

submission.to_csv("submission.csv", index=False)
print("submission.csv file created successfully!")


submission.csv file created successfully!
