In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# 파일 불러오기

In [30]:
df = pd.read_csv("C:\\AI\\car_evaluation.csv") # 컬럼이 없을때는 header = None 붙이기
df
# 컬럼을 붙이고 싶다면 
df.columns = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety','output']

In [31]:
# 불필요한 컬럼 제거 
#df.drop(columns=['id'], inplace=True)

# 문자형 데이터를 숫자로 변환 (엔코딩)
encoder = LabelEncoder()
df['price'] = encoder.fit_transform(df['price'])
df['maint'] = encoder.fit_transform(df['maint'])
df['doors'] = encoder.fit_transform(df['doors'])
df['persons'] = encoder.fit_transform(df['persons'])
df['lug_capacity'] = encoder.fit_transform(df['lug_capacity'])
df['safety'] = encoder.fit_transform(df['safety'])


# 결측치 확인

In [32]:
df.isnull().sum()

price           0
maint           0
doors           0
persons         0
lug_capacity    0
safety          0
output          0
dtype: int64

In [33]:
# 결측치를 평균값으로 채우기 (있을때만)
# df.fillna(df.mean(), inplace=True) 

# 특성과 라벨 분리 
X = df.drop(columns=['output'])
y = df['output']

# 정규화 및 데이터 분할

In [34]:
# 정규화 
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 훈련/테스트 데이터 분할 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# shape를 통해 제대로 분리 됐는지 확인
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1381, 6) (346, 6) (1381,) (346,)


# SVM

In [35]:
# SVM
clf_svm = SVC(random_state=0)
clf_svm.fit(X_train, y_train)

pred_svm = clf_svm.predict(X_test)

print("\n--- SVM Classifier ---")
print(accuracy_score(y_test, pred_svm))
print(confusion_matrix(y_test, pred_svm))


--- SVM Classifier ---
0.8786127167630058
[[ 62   1  14   0]
 [  9   4   0   1]
 [ 17   0 222   0]
 [  0   0   0  16]]


# DecisionTree / DT

In [36]:
# DecisionTree / DT
clf_dt = DecisionTreeClassifier(random_state=0)
clf_dt.fit(X_train, y_train)

pred_dt = clf_dt.predict(X_test)

print ("\n--- Decision Tree Classifier ---")
print (accuracy_score(y_test, pred_dt))
print (confusion_matrix(y_test, pred_dt))


--- Decision Tree Classifier ---
0.976878612716763
[[ 74   0   3   0]
 [  0  12   0   2]
 [  0   0 239   0]
 [  3   0   0  13]]


# RandomForest / RF

In [37]:
# RandomForest / RF
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(X_train, y_train)

pred = rf_clf.predict(X_test)

print ("\n--- Random Forest ---")
print (accuracy_score(y_test,pred))
print (confusion_matrix(y_test, pred))


--- Random Forest ---
0.9739884393063584
[[ 72   1   4   0]
 [  1  10   0   3]
 [  0   0 239   0]
 [  0   0   0  16]]
