### 匯入套件

In [42]:
import pandas as pd
import warnings

In [43]:
from sklearn.tree import DecisionTreeClassifier    #決策樹
from sklearn.naive_bayes import GaussianNB         #高斯天真貝氏
from sklearn.naive_bayes import MultinomialNB      #多項式天真貝氏
from sklearn.linear_model import Perceptron        #感知器
from sklearn.neighbors import KNeighborsClassifier #KNN
from sklearn import linear_model                   #邏輯回歸
from sklearn import svm                            #SVM

In [44]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [45]:
warnings.filterwarnings('ignore')

### 導入文本資料

In [46]:
Word_array = pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Main_Features\TF-IDF_500.csv", header=0,index_col=None)

### 導入統計特徵資料(第一、第二、第三人稱代詞、留言長度)

In [47]:
Statistical_Features=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Statistical\Statistical_Features.csv", header=0, index_col=None)

### 導入情感特徵資料(正面情感字數、負面情感字數、情感分數、表情符號數、表情符號情感分數)

In [48]:
Sentiment=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Sentiment\All_Sentiment_Sentiwordnet.csv", header=0, index_col=None)

### 導入卡方霸凌詞資料

In [49]:
BullyWord=pd.read_csv(r"..\..\..\Feature_Array\PTT\Python\Extra_Features\Bully_Word\Chisquare_Bully_data.csv", header=0, index_col=None)

In [50]:
X = pd.concat([Word_array.iloc[:, :-1],Statistical_Features.iloc[:, 1:],Sentiment.iloc[:, 1:],BullyWord.iloc[:, 1:]], axis=1) # Features
Y = Word_array['answer'] # Target variable

### 宣告list 儲存評估指標結果

In [10]:
list_accuracy=[]
list_precision=[]
list_recall=[]
list_f1_Weight=[]
list_f1_Micro=[]
list_f1_Macro=[]
list_AUC=[]

### 輸入KFold參數

In [11]:
KF = KFold(n_splits=10, shuffle=True,random_state=1) 

### 建立機器學習函式

In [12]:
def ML(model):
    score = ['accuracy', 'precision', 'recall', 'f1_weighted','f1_micro','f1_macro', 'roc_auc']
    result =  cross_validate(model,X,Y,cv=KF,scoring=score)
    return result

### 決策樹

In [13]:
DT = DecisionTreeClassifier()
DT_Score=ML(DT)

In [14]:
list_accuracy.append(round(DT_Score['test_accuracy'].mean(),4))

list_precision.append(round(DT_Score['test_precision'].mean(),4))

list_recall.append(round(DT_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(DT_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(DT_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(DT_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(DT_Score['test_roc_auc'].mean(),4))

### 高斯天真貝氏

In [15]:
GNB=GaussianNB()
GNB_Score=ML(GNB)

In [16]:
list_accuracy.append(round(GNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(GNB_Score['test_precision'].mean(),4))

list_recall.append(round(GNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(GNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(GNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(GNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(GNB_Score['test_roc_auc'].mean(),4))

### 多項式天真貝氏

In [17]:
MNB= MultinomialNB()
MNB_Score=ML(MNB)

In [18]:
list_accuracy.append(round(MNB_Score['test_accuracy'].mean(),4))

list_precision.append(round(MNB_Score['test_precision'].mean(),4))

list_recall.append(round(MNB_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(MNB_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(MNB_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(MNB_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(MNB_Score['test_roc_auc'].mean(),4))

### Perceptron

In [19]:
PLA=Perceptron(penalty='l2')
PLA_Score=ML(PLA)

In [20]:
list_accuracy.append(round(PLA_Score['test_accuracy'].mean(),4))

list_precision.append(round(PLA_Score['test_precision'].mean(),4))

list_recall.append(round(PLA_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(PLA_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(PLA_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(PLA_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(PLA_Score['test_roc_auc'].mean(),4))

### 線性邏輯回歸

In [21]:
LR=linear_model.LogisticRegression()
LR_Score=ML(LR)

In [22]:
list_accuracy.append(round(LR_Score['test_accuracy'].mean(),4))

list_precision.append(round(LR_Score['test_precision'].mean(),4))

list_recall.append(round(LR_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(LR_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(LR_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(LR_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(LR_Score['test_roc_auc'].mean(),4))

### KNN

In [23]:
KNN = KNeighborsClassifier()
KNN_Score=ML(KNN)

In [24]:
list_accuracy.append(round(KNN_Score['test_accuracy'].mean(),4))

list_precision.append(round(KNN_Score['test_precision'].mean(),4))

list_recall.append(round(KNN_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(KNN_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(KNN_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(KNN_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(KNN_Score['test_roc_auc'].mean(),4))

### SVM

In [25]:
SVM=svm.SVC(probability=True, kernel='linear')
SVM_Score=ML(SVM)

In [26]:
list_accuracy.append(round(SVM_Score['test_accuracy'].mean(),4))

list_precision.append(round(SVM_Score['test_precision'].mean(),4))

list_recall.append(round(SVM_Score['test_recall'].mean(),4))

list_f1_Weight.append(round(SVM_Score['test_f1_weighted'].mean(),4))

list_f1_Micro.append(round(SVM_Score['test_f1_micro'].mean(),4))

list_f1_Macro.append(round(SVM_Score['test_f1_macro'].mean(),4))

list_AUC.append(round(SVM_Score['test_roc_auc'].mean(),4))

In [27]:
result_title=['Decision Tree','Gaussian Naive Bayes','Multinomial Naive Bayes','PLA','Logistic Regression','KNN','SVM']
result_dict={'所有特徵':result_title,'Accuracy':list_accuracy,'Precision':list_precision,'Recall':list_recall,'F1_Weight':list_f1_Weight,'F1_Micro':list_f1_Micro,'F1_Macro':list_f1_Macro,'AUC':list_AUC}

In [28]:
df = pd.DataFrame(result_dict).T