In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 엑셀 데이터 로드 함수
def load_data(file_path):
    x = df.iloc[:, :-1]  # 마지막 열을 제외한 특성 데이터
    y = df.iloc[:, -1]   # 마지막 열(타겟 데이터)
    return x, y

file_path = 'mobile.csv'  # 데이터 파일 경로
df = pd.read_csv(file_path)  # 엑셀 파일을 데이터프레임으로 읽기
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
x, y = load_data(file_path)  # 데이터 로드

In [4]:
# 데이터를 훈련 세트와 테스트 세트로 분할 (80% 훈련, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [24]:
# 데이터 정규화 (평균 0, 표준편차 1로 변환)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print(X_train)

      battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  \
968            1923     0          0.5         1   7       0          46   
240             633     1          2.2         0   0       1          49   
819            1236     0          0.9         1   2       1          57   
692             781     0          1.1         0   2       0          38   
420            1456     1          0.5         1   7       0           7   
...             ...   ...          ...       ...  ..     ...         ...   
1130           1975     1          1.9         1   2       0          31   
1294            589     1          0.5         0   1       1          59   
860            1829     1          0.5         0   0       1          15   
1459           1927     0          0.9         1   3       0          11   
1126            635     1          0.6         1   1       1          50   

      m_dep  mobile_wt  n_cores  pc  px_height  px_width   ram  sc_h  sc_w  \
968     0

In [6]:
# Decision Tree (의사결정나무 분류 모델)
from sklearn.tree import DecisionTreeClassifier

# 모델 학습
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_scaled, y_train)

# 예측 및 평가
dt_pred = dt_model.predict(X_test_scaled)  # 테스트 데이터 예측
dt_acc = accuracy_score(y_test, dt_pred)  # 정확도 평가
print(f'Decision Tree Accuracy: {dt_acc:.4f}\n')
print(classification_report(y_test, dt_pred), '\n')
print(confusion_matrix(y_test, dt_pred))

Decision Tree Accuracy: 0.8250

              precision    recall  f1-score   support

           0       0.91      0.88      0.89       105
           1       0.73      0.82      0.77        91
           2       0.78      0.66      0.72        92
           3       0.86      0.91      0.89       112

    accuracy                           0.82       400
   macro avg       0.82      0.82      0.82       400
weighted avg       0.83      0.82      0.82       400
 

[[ 92  13   0   0]
 [  9  75   7   0]
 [  0  15  61  16]
 [  0   0  10 102]]


In [7]:
# Logistic Regression (로지스틱 회귀 분류 모델)
from sklearn.linear_model import LogisticRegression

# 모델 학습
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train_scaled, y_train)

# 예측 및 평가
lr_pred = lr_model.predict(X_test_scaled)
lr_acc = accuracy_score(y_test, lr_pred)
print(f'Logistic Regression Accuracy: {lr_acc:.4f}\n')
print(classification_report(y_test, lr_pred), '\n')
print(confusion_matrix(y_test, lr_pred))

Logistic Regression Accuracy: 0.9750

              precision    recall  f1-score   support

           0       1.00      0.96      0.98       105
           1       0.94      1.00      0.97        91
           2       0.99      0.95      0.97        92
           3       0.97      0.99      0.98       112

    accuracy                           0.97       400
   macro avg       0.98      0.97      0.97       400
weighted avg       0.98      0.97      0.98       400
 

[[101   4   0   0]
 [  0  91   0   0]
 [  0   2  87   3]
 [  0   0   1 111]]


In [13]:
# SVM (서포트 벡터 머신 분류 모델)
from sklearn.svm import SVC

# 모델 학습
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)

# 예측 및 평가
svm_pred = svm_model.predict(X_test_scaled)
svm_acc = accuracy_score(y_test, svm_pred)
print(f'SVM Accuracy: {svm_acc:.4f}\n')
print(classification_report(y_test, svm_pred), '\n')
print(confusion_matrix(y_test, svm_pred))

SVM Accuracy: 0.8925

              precision    recall  f1-score   support

           0       0.95      0.93      0.94       105
           1       0.80      0.89      0.84        91
           2       0.84      0.82      0.83        92
           3       0.96      0.92      0.94       112

    accuracy                           0.89       400
   macro avg       0.89      0.89      0.89       400
weighted avg       0.90      0.89      0.89       400
 

[[ 98   7   0   0]
 [  5  81   5   0]
 [  0  13  75   4]
 [  0   0   9 103]]


In [30]:
# Random Forest (랜덤 포레스트 분류 모델)
from sklearn.ensemble import RandomForestClassifier

# 모델 학습
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# 예측 및 평가
rf_pred = rf_model.predict(X_test_scaled)
rf_acc = accuracy_score(y_test, rf_pred)
print(f'Random Forest Accuracy: {rf_acc:.4f}\n')
print(classification_report(y_test, rf_pred), '\n')
print(confusion_matrix(y_test, rf_pred))

Random Forest Accuracy: 0.8925

              precision    recall  f1-score   support

           0       0.95      0.96      0.96       105
           1       0.89      0.87      0.88        91
           2       0.78      0.87      0.82        92
           3       0.94      0.87      0.90       112

    accuracy                           0.89       400
   macro avg       0.89      0.89      0.89       400
weighted avg       0.90      0.89      0.89       400
 

[[101   4   0   0]
 [  5  79   7   0]
 [  0   6  80   6]
 [  0   0  15  97]]
