## mobile 데이터 분류

In [1]:
# 사용 패키지 불러오기
import pandas as pd # 판다스
from sklearn.model_selection import train_test_split # 훈련/테스트 세트 분리
from sklearn.preprocessing import StandardScaler # 표준화
from sklearn.tree import DecisionTreeClassifier # DT
from sklearn.ensemble import RandomForestClassifier # RF
from sklearn.svm import SVC # SVM
from sklearn.linear_model import LogisticRegression # LR
from sklearn.metrics import accuracy_score # 정확도

In [2]:
file_path = "C:/Users/samsung-user/OneDrive/바탕 화면/인공지능개론/1주차/mobile.csv" 
df = pd.read_csv(file_path)

In [13]:
pd.unique(df['price_range']) # 타겟 변수(범주형)

array([1, 2, 3, 0], dtype=int64)

In [3]:
print(df.head())  
print(df.columns) 

   battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  m_dep  \
0            842     0          2.2         0   1       0           7    0.6   
1           1021     1          0.5         1   0       1          53    0.7   
2            563     1          0.5         1   2       1          41    0.9   
3            615     1          2.5         0   0       0          10    0.8   
4           1821     1          1.2         0  13       1          44    0.6   

   mobile_wt  n_cores  ...  px_height  px_width   ram  sc_h  sc_w  talk_time  \
0        188        2  ...         20       756  2549     9     7         19   
1        136        3  ...        905      1988  2631    17     3          7   
2        145        5  ...       1263      1716  2603    11     2          9   
3        131        6  ...       1216      1786  2769    16     8         11   
4        141        2  ...       1208      1212  1411     8     2         15   

   three_g  touch_screen  wifi  price_

In [5]:
# 훈련 세트 & 테스트 세트 분할 (8:2)
X = df.drop(columns=['price_range'])  # 타겟 변수 : 'price_range' (클래스 레이블)
y = df['price_range']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [6]:
# 표준화
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# 모델 : 의사결정나무(DT), 랜덤포레스트(RF), 서포트벡터머신(SVM), 로지스틱선형회귀(LR)
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel='linear', random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=42)
}

In [8]:
# 모델 학습 및 평가
for name, model in models.items():
    if name in ["SVM", "Logistic Regression"]:  
        model.fit(X_train_scaled, y_train)  # 🔹 SVM, LR은 표준화 데이터 사용
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)  # 🔹 DT, RF는 원본 데이터 사용
        y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")

Decision Tree Accuracy: 0.8300
Random Forest Accuracy: 0.8800
SVM Accuracy: 0.9625
Logistic Regression Accuracy: 0.9650
