In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# CSV 파일 불러오기
file_path = "D:/mobile.csv"  
df = pd.read_csv(file_path)

# 데이터 확인
print("데이터 확인 (상위 5개 행 출력):")
print(df.head())  # 데이터가 어떻게 생겼는지 확인
print("\n열 이름 확인:")
print(df.columns)  # 열(column) 이름 출력

# Label Encoding
encoder = LabelEncoder()
df['price_range'] = encoder.fit_transform(df['price_range'])

# 입력(X)과 출력(y) 분리
X = df.drop(columns=['price_range'])  
y = df['price_range']  

# 데이터 분할 (훈련 80%, 테스트 20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. 의사결정 나무 (Decision Tree)
dt_model = DecisionTreeClassifier(random_state=42)  
dt_model.fit(X_train, y_train)  
dt_pred = dt_model.predict(X_test) 
print("\n--- Decision Tree Classifier ---")
print(f"Accuracy: {accuracy_score(y_test, dt_pred):.4f}")

# 2. 랜덤 포레스트 (Random Forest)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)  
rf_pred = rf_model.predict(X_test)  
print("\n--- Random Forest Classifier ---")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.4f}")  

# 3. 서포트 벡터 머신 (SVM)
svm_model = SVC(kernel='linear', random_state=42)  
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test) 
print("\n--- SVM Classifier ---")
print(f"Accuracy: {accuracy_score(y_test, svm_pred):.4f}")

# 4. 로지스틱 회귀 모델 학습(Logistic Regression)
lr_model = LogisticRegression(max_iter=5000, random_state=42)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)
print("\n--- Logistic Regression Classifier ---")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")                                     

데이터 확인 (상위 5개 행 출력):
   battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  m_dep  \
0            842     0          2.2         0   1       0           7    0.6   
1           1021     1          0.5         1   0       1          53    0.7   
2            563     1          0.5         1   2       1          41    0.9   
3            615     1          2.5         0   0       0          10    0.8   
4           1821     1          1.2         0  13       1          44    0.6   

   mobile_wt  n_cores  ...  px_height  px_width   ram  sc_h  sc_w  talk_time  \
0        188        2  ...         20       756  2549     9     7         19   
1        136        3  ...        905      1988  2631    17     3          7   
2        145        5  ...       1263      1716  2603    11     2          9   
3        131        6  ...       1216      1786  2769    16     8         11   
4        141        2  ...       1208      1212  1411     8     2         15   

   three_g  touch

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
# LogisticRegression에서 ConvergenceWarning 해결 : 데이터 스케일링

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LogisticRegression(max_iter=5000, random_state=42)
lr_model.fit(X_train_scaled, y_train)
lr_pred = lr_model.predict(X_test_scaled)
print("\n--- Logistic Regression Classifier (Scaled) ---")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")


--- Logistic Regression Classifier (Scaled) ---
Accuracy: 0.9750
