In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

seed=123

# 1. Data Load

In [2]:
#https://www.kaggle.com/datasets/somesh24/spambase
df = pd.read_csv(r"data/spambase_csv.csv")

X= df.iloc[:,:-1]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = seed,stratify=y)

# 2.Base Model training

## Logistic regression

In [3]:
LogReg = LogisticRegression()
LogReg.fit(X_train, y_train)
y_pred_LogReg = LogReg.predict(X_test)

LogReg_accuracy = accuracy_score(y_test, y_pred_LogReg)
print("LogReg accuracy_score: ", LogReg_accuracy)

LogReg accuracy_score:  0.9370249728555917


## Decision tree

In [4]:
DecTre = DecisionTreeClassifier()
DecTre.fit(X_train, y_train)
y_pred_DecTre = DecTre.predict(X_test)

DecTre_accuracy = accuracy_score(y_test, y_pred_DecTre)
print("DecTre accuracy_score: ", DecTre_accuracy)

DecTre accuracy_score:  0.9283387622149837


## SVM

In [5]:
SVM = SVC()
SVM.fit(X_train, y_train)
y_pred_SVM = SVM.predict(X_test)

SVM_accuracy = accuracy_score(y_test,y_pred_SVM)
print("SVM accuracy : ", SVM_accuracy)

SVM accuracy :  0.7144408251900108


## KNC

In [6]:
KNC = KNeighborsClassifier()
KNC.fit(X_train, y_train)
y_pred_KNC = KNC.predict(X_test)

KNC_accuracy = accuracy_score(y_test,y_pred_KNC)
print("KNC accuracy : ", KNC_accuracy)

KNC accuracy :  0.8143322475570033


## Random Forest

In [7]:
RanFor = RandomForestClassifier()
RanFor.fit(X_train, y_train)
y_pred_RanFor = RanFor.predict(X_test)

RanFor_accuracy = accuracy_score(y_test,y_pred_RanFor)
print("RanFor accuracy : ", RanFor_accuracy)

RanFor accuracy :  0.9522258414766558


## XGB

In [8]:
XGB = XGBClassifier()
XGB.fit(X_train, y_train)
y_pred_XGB = XGB.predict(X_test)

XGB_accuracy = accuracy_score(y_test,y_pred_XGB)
print("XGB accuracy : ", XGB_accuracy)

XGB accuracy :  0.9543973941368078


## 종합 코드

In [9]:
model_list = [LogisticRegression(), DecisionTreeClassifier(), SVC(), KNeighborsClassifier(), RandomForestClassifier(), XGBClassifier()] # model 선언
accuracy_list=[]
model_name=[]
for model in model_list:
    model.fit(X_train, y_train)                 # model 학습
    y_pred = model.predict(X_test)              # model 평가
    accuracy = accuracy_score(y_test, y_pred)   # model 정확도 계산

    accuracy_list.append(accuracy)              # model 결과 저장
    model_name.append(model.__class__.__name__) # model 이름 저장

#결과 출력
result_df = pd.DataFrame({'Model': model_name, 'Accuracy': accuracy_list})
print(result_df)

                    Model  Accuracy
0      LogisticRegression  0.937025
1  DecisionTreeClassifier  0.916395
2                     SVC  0.714441
3    KNeighborsClassifier  0.814332
4  RandomForestClassifier  0.947883
5           XGBClassifier  0.954397
