In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier 
from sklearn.model_selection import train_test_split

In [2]:
# Load Data
train_path = "/kaggle/input/car-eval/car_eval_train.csv"
test_path = "/kaggle/input/car-eval/car_eval_test.csv"
df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [3]:
# Preserve 'id' column from test set if it exists
if "id" in test_df.columns:
    test_ids = test_df["id"]
else:
    test_ids = pd.Series(range(1, len(test_df) + 1), name="id")

In [4]:
# Drop unnecessary columns
df.drop(columns=[col for col in ["Unnamed: 0"] if col in df.columns], inplace=True)
test_df.drop(columns=[col for col in ["Unnamed: 0", "id"] if col in test_df.columns], inplace=True)

In [5]:
# Encoding categorical variables
mappings = {
    "buying": {"low": 1, "med": 2, "high": 3, "vhigh": 4},
    "maint": {"low": 1, "med": 2, "high": 3, "vhigh": 4},
    "doors": {"2": 1, "3": 2, "4": 3, "5more": 4},
    "persons": {"2": 1, "4": 2, "more": 3},
    "lug_boot": {"small": 1, "med": 2, "big": 3},
    "safety": {"low": 1, "med": 2, "high": 3}
}

df.replace(mappings, inplace=True)
test_df.replace(mappings, inplace=True)
df = df.infer_objects(copy=False)
test_df = test_df.infer_objects(copy=False)

  df.replace(mappings, inplace=True)
  test_df.replace(mappings, inplace=True)


In [6]:
# Encode target variable
label_enc = LabelEncoder()
df["class"] = label_enc.fit_transform(df["class"])

In [7]:
# Split features and target
X = df.drop(columns=["class"])
y = df["class"]

In [8]:
# Split training data into train and validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.05, random_state=42, stratify=y)

In [9]:
# Train a XGB model
clf = XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42)
clf.fit(X_train, y_train)

In [10]:
# Validate model performance
val_accuracy = clf.score(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy:.2%}")

Validation Accuracy: 100.00%


In [11]:
# Ensure test dataset has the same features as training
for col in X.columns:
    if col not in test_df.columns:
        test_df[col] = 0  # Assign 0 instead of NaN to avoid issues with ML models
test_df = test_df[X.columns]  # Reorder columns

In [12]:
# Make predictions
predictions = clf.predict(test_df)
predicted_classes = label_enc.inverse_transform(predictions)

In [13]:
# Create output dataframe with original IDs
output_df = pd.DataFrame({"id": test_ids, "class": predicted_classes})

In [14]:
# Save predictions
output_path = "/kaggle/working/car_eval_predictions.csv"
output_df.to_csv(output_path, index=False)

In [15]:
print(f"Predictions saved to {output_path}")

Predictions saved to /kaggle/working/car_eval_predictions.csv
