### 匯入套件

In [1]:
import pandas as pd
import warnings

In [2]:
from sklearn.ensemble import BaggingClassifier

In [3]:
from sklearn.tree import DecisionTreeClassifier    #決策樹
from sklearn.naive_bayes import GaussianNB         #高斯天真貝氏
from sklearn.naive_bayes import MultinomialNB      #多項式天真貝氏
from sklearn.linear_model import Perceptron        #感知器
from sklearn.neighbors import KNeighborsClassifier #KNN
from sklearn import linear_model                   #邏輯回歸
from sklearn import svm                            #SVM

In [4]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [5]:
warnings.filterwarnings('ignore')

### 導入文本資料

In [6]:
Word_array = pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Main_Features\TF-IDF_500.csv", header=0,index_col=None)

### 導入統計特徵資料(第一、第二、第三人稱代詞、留言長度)

In [7]:
Statistical_Features=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Statistical\Statistical_Features.csv", header=0, index_col=None)

### 導入情感特徵資料(正面情感字數、負面情感字數、情感分數、表情符號數、表情符號情感分數)

In [8]:
Sentiment=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Sentiment\All_Sentiment.csv", header=0, index_col=None)

### 導入霸凌詞資料

In [9]:
BullyWord=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Bully_Word\Chisquare_Bully_data.csv", header=0, index_col=None)

### X：Bag of Words、統計特徵、情感特徵、Word2Vec霸凌詞

In [10]:
X = pd.concat([Word_array.iloc[:, :-1],Statistical_Features.iloc[:, 1:],Sentiment.iloc[:, 1:],BullyWord.iloc[:, 1:]], axis=1) # Features
Y = Word_array['answer'] # Target variable

### 宣告list 儲存評估指標結果

In [11]:
list_accuracy=[]
list_precision=[]
list_recall=[]
list_f1_Weight=[]
list_f1_Micro=[]
list_f1_Macro=[]
list_AUC=[]

### 輸入KFold參數

In [12]:
KF = KFold(n_splits=10, shuffle=True,random_state=1) 

### 建立機器學習函式

In [13]:
def ML(model):
    score = ['accuracy', 'precision', 'recall', 'f1_weighted','f1_micro','f1_macro', 'roc_auc']
    result =  cross_validate(model,X,Y,cv=KF,scoring=score)
    return result

### 決策樹

In [14]:
DT = DecisionTreeClassifier()
Bagging_DT = BaggingClassifier(base_estimator=DT,n_estimators=50)
Bagging_DT_Score=ML(Bagging_DT)

In [15]:
list_accuracy.append(round(Bagging_DT_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_DT_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_DT_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_DT_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_DT_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_DT_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_DT_Score['test_roc_auc'].mean(),4))

### 高斯天真貝氏

In [16]:
GNB=GaussianNB()
Bagging_GNB = BaggingClassifier(base_estimator=GNB,n_estimators=50)
Bagging_GNB_Score=ML(Bagging_GNB)

In [17]:
list_accuracy.append(round(Bagging_GNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_GNB_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_GNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_GNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_GNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_GNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_GNB_Score['test_roc_auc'].mean(),4))

### 多項式天真貝氏

In [18]:
MNB=MultinomialNB()
Bagging_MNB = BaggingClassifier(base_estimator=MNB,n_estimators=50)
Bagging_MNB_Score=ML(Bagging_MNB)

In [19]:
list_accuracy.append(round(Bagging_MNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_MNB_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_MNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_MNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_MNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_MNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_MNB_Score['test_roc_auc'].mean(),4))

### Perceptron

In [20]:
PLA=Perceptron(penalty='l2')
Bagging_PLA = BaggingClassifier(base_estimator=PLA,n_estimators=50)
Bagging_PLA_Score=ML(Bagging_PLA)

In [21]:
list_accuracy.append(round(Bagging_PLA_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_PLA_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_PLA_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_PLA_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_PLA_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_PLA_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_PLA_Score['test_roc_auc'].mean(),4))

### 線性邏輯回歸

In [22]:
LR=linear_model.LogisticRegression()
Bagging_LR = BaggingClassifier(base_estimator=LR,n_estimators=50)
Bagging_LR_Score=ML(Bagging_LR)

In [23]:
list_accuracy.append(round(Bagging_LR_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_LR_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_LR_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_LR_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_LR_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_LR_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_LR_Score['test_roc_auc'].mean(),4))

### KNN

In [24]:
KNN = KNeighborsClassifier()
Bagging_KNN = BaggingClassifier(base_estimator=KNN,n_estimators=50)
Bagging_KNN_Score=ML(Bagging_KNN)

In [25]:
list_accuracy.append(round(Bagging_KNN_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_KNN_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_KNN_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_KNN_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_KNN_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_KNN_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_KNN_Score['test_roc_auc'].mean(),4))

### SVM

In [None]:
SVM=svm.SVC(probability=True, kernel='linear')
Bagging_SVM = BaggingClassifier(base_estimator=SVM,n_estimators=50)
Bagging_SVM_Score=ML(Bagging_SVM)

In [None]:
list_accuracy.append(round(Bagging_SVM_Score['test_accuracy'].mean(),4))

list_precision.append(round(Bagging_SVM_Score['test_precision'].mean(),4))

list_recall.append(round(Bagging_SVM_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(Bagging_SVM_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(Bagging_SVM_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(Bagging_SVM_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(Bagging_SVM_Score['test_roc_auc'].mean(),4))

In [None]:
result_title=['Bagging Decision Tree','Bagging Gaussian Naive Bayes','Bagging Multinomial Naive Bayes','Bagging PLA','Bagging Logistic Regression','Bagging KNN','Bagging SVM']
result_dict={'所有特徵':result_title,'Accuracy':list_accuracy,'Precision':list_precision,'Recall':list_recall,'F1_Weight':list_f1_Weight,'F1_Micro':list_f1_Micro,'F1_Macro':list_f1_Macro,'AUC':list_AUC}

In [None]:
df = pd.DataFrame(result_dict).T