In [15]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder



# -----------------------------

# 1) 데이터 준비

# -----------------------------

cols = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/car_evaluation.csv", header=None, names=cols).dropna()



X = df.drop(columns=["class"])

y = df["class"]

x_encoders = {}
for col in X.columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    x_encoders[col] = le  # (선택) 추후 역변환용으로 보관

y_encoder = LabelEncoder()
y_enc = y_encoder.fit_transform(y)

print("\nClass mapping (y):")
for idx, cls in enumerate(y_encoder.classes_):
    print(f"{cls} -> {idx}")




X_train, X_test, y_train, y_test = train_test_split(

    X, y, test_size=0.2, stratify=y, random_state=42

)




# -----------------------------

# 2) 모델 구성

# -----------------------------

dt = DecisionTreeClassifier(random_state=42)

rf = RandomForestClassifier(n_estimators=200, random_state=42)

lr = LogisticRegression(max_iter=500)



# -----------------------------

# 3) 모델 학습

# -----------------------------

dt.fit(X_train, y_train)

rf.fit(X_train, y_train)

lr.fit(X_train, y_train)



# -----------------------------

# 4) 모델 평가

# -----------------------------

dt_acc = accuracy_score(y_test, dt.predict(X_test))

rf_acc = accuracy_score(y_test, rf.predict(X_test))

lr_acc = accuracy_score(y_test, lr.predict(X_test))



print("=== Test Accuracy ===")

print(f"Decision Tree : {dt_acc:.4f}")

print(f"Random Forest : {rf_acc:.4f}")

print(f"Logistic Reg. : {lr_acc:.4f}")



Class mapping (y):
acc -> 0
good -> 1
unacc -> 2
vgood -> 3
=== Test Accuracy ===
Decision Tree : 0.9855
Random Forest : 0.9798
Logistic Reg. : 0.6908


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# === 1) 데이터 준비 ===
# - iris.data 파일에는 헤더가 없어서 직접 컬럼명을 지정
# - 결측치가 혹시 있으면 dropna로 제거
cols = ["sepal_length", "sepal_width", "petal_length", "petal_width", "label"]
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/iris.data",
                 header=None, names=cols).dropna()
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
