In [None]:

# Assignment ML5: Random Forest Classification for Car Safety
# Educational code. Expects a 'car.csv' (UCI Car Evaluation) or will create a small synthetic sample.

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

path = "./car.csv"
try:
    df = pd.read_csv(path)
    print("Loaded car dataset from", path)
except Exception as e:
    print("Could not load './car.csv' â€” generating a small synthetic dataset. Error:", e)
    df = pd.DataFrame({
        "buying": ["vhigh","high","med","low"]*100,
        "maint": ["vhigh","high","med","low"]*100,
        "doors": ["2","3","4","5more"]*100,
        "persons": ["2","4","more"]*133 + ["2"],
        "lug_boot": ["small","med","big"]*133 + ["small"],
        "safety": ["low","med","high"]*133 + ["low"],
        "class": ["unacc","acc","good","vgood"]*100
    })
print("Shape:", df.shape)
display(df.head())

# Encode categorical columns
X = df.drop(columns=["class"])
y = df["class"]
le = LabelEncoder()
y_enc = le.fit_transform(y)

X_enc = pd.get_dummies(X, drop_first=True)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_enc, y_enc, test_size=0.2, random_state=42, stratify=y_enc)

# Model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
pred = rf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
print("\nClassification Report:\n", classification_report(y_test, pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, pred))
