### 匯入套件

In [19]:
import pandas as pd
import warnings

In [20]:
from sklearn.ensemble import AdaBoostClassifier

In [21]:
from sklearn.tree import DecisionTreeClassifier    #決策樹
from sklearn.naive_bayes import GaussianNB         #高斯天真貝氏
from sklearn.naive_bayes import MultinomialNB      #多項式天真貝氏
from sklearn.linear_model import Perceptron        #感知器
from sklearn.neighbors import KNeighborsClassifier #KNN
from sklearn import linear_model                   #邏輯回歸
from sklearn import svm                            #SVM

In [22]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [23]:
warnings.filterwarnings('ignore')

### 導入Bag of Words資料

In [24]:
Word_array = pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Main_Features\TF-IDF_500.csv", header=0,index_col=None)

X = Word_array.iloc[:, :-1] # Features
Y = Word_array['answer'] # Target variable

### 宣告list 儲存評估指標結果

In [8]:
list_accuracy=[]
list_precision=[]
list_recall=[]
list_f1_Weight=[]
list_f1_Micro=[]
list_f1_Macro=[]
list_AUC=[]

### 輸入KFold參數

In [9]:
KF = KFold(n_splits=10, shuffle=True,random_state=1) 

### 建立機器學習函式

In [10]:
def ML(model):
    score = ['accuracy', 'precision', 'recall', 'f1_weighted','f1_micro','f1_macro', 'roc_auc']
    result =  cross_validate(model,X,Y,cv=KF,scoring=score)
    return result

### 決策樹

In [11]:
DT = DecisionTreeClassifier()
AdaBoost_DT = AdaBoostClassifier(base_estimator=DT,n_estimators=50,algorithm='SAMME')
AdaBoost_DT_Score=ML(AdaBoost_DT)

In [12]:
list_accuracy.append(round(AdaBoost_DT_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_DT_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_DT_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_DT_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_DT_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_DT_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_DT_Score['test_roc_auc'].mean(),4))

### 高斯天真貝氏

In [13]:
GNB=GaussianNB()
AdaBoost_GNB = AdaBoostClassifier(base_estimator=GNB,n_estimators=50,algorithm='SAMME')
AdaBoost_GNB_Score=ML(AdaBoost_GNB)

In [14]:
list_accuracy.append(round(AdaBoost_GNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_GNB_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_GNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_GNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_GNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_GNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_GNB_Score['test_roc_auc'].mean(),4))

### 多項式天真貝式

In [15]:
MNB=MultinomialNB()
AdaBoost_MNB = AdaBoostClassifier(base_estimator=MNB,n_estimators=50,algorithm='SAMME')
AdaBoost_MNB_Score=ML(AdaBoost_MNB)

In [16]:
list_accuracy.append(round(AdaBoost_MNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_MNB_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_MNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_MNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_MNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_MNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_MNB_Score['test_roc_auc'].mean(),4))

### Perceptron

In [17]:
PLA=Perceptron(penalty='l2')
AdaBoost_PLA = AdaBoostClassifier(base_estimator=PLA,n_estimators=50,algorithm='SAMME')
AdaBoost_PLA_Score=ML(AdaBoost_PLA)

In [18]:
list_accuracy.append(round(AdaBoost_PLA_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_PLA_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_PLA_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_PLA_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_PLA_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_PLA_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_PLA_Score['test_roc_auc'].mean(),4))

### 線性邏輯回歸

In [19]:
LR=linear_model.LogisticRegression()
AdaBoost_LR = AdaBoostClassifier(base_estimator=LR,n_estimators=50,algorithm='SAMME')
AdaBoost_LR_Score=ML(AdaBoost_LR)

In [20]:
list_accuracy.append(round(AdaBoost_LR_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_LR_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_LR_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_LR_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_LR_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_LR_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_LR_Score['test_roc_auc'].mean(),4))

In [21]:
result_title=['AdaBoost Decision Tree','AdaBoost Gaussian Naive Bayes','Multinomial Naive Bayes','AdaBoost Perceptron','AdaBoost Logistic Regression']
result_dict={'詞袋':result_title,'Accuracy':list_accuracy,'Precision':list_precision,'Recall':list_recall,'F1_Weight':list_f1_Weight,'F1_Micro':list_f1_Micro,'F1_Macro':list_f1_Macro,'AUC':list_AUC}

### SVM

In [None]:
SVM=svm.SVC(probability=True, kernel='linear')
AdaBoost_SVM = AdaBoostClassifier(base_estimator=SVM,n_estimators=50,algorithm='SAMME')
AdaBoost_SVM_Score=ML(AdaBoost_SVM )

In [None]:
list_accuracy.append(round(AdaBoost_SVM_Score['test_accuracy'].mean(),4))

list_precision.append(round(AdaBoost_SVM_Score['test_precision'].mean(),4))

list_recall.append(round(AdaBoost_SVM_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(AdaBoost_SVM_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(AdaBoost_SVM_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(AdaBoost_SVM_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(AdaBoost_SVM_Score['test_roc_auc'].mean(),4))

In [None]:
result_title=['AdaBoost Decision Tree','AdaBoost Gaussian Naive Bayes','AdaBoost Multinomial Naive Bayes','AdaBoost PLA','AdaBoost Logistic Regression','AdaBoost SVM']
result_dict={'文本':result_title,'Accuracy':list_accuracy,'Precision':list_precision,'Recall':list_recall,'F1_Weight':list_f1_Weight,'F1_Micro':list_f1_Micro,'F1_Macro':list_f1_Macro,'AUC':list_AUC}

In [None]:
df = pd.DataFrame(result_dict).T