### 匯入套件

In [1]:
import pandas as pd
import warnings

In [2]:
from sklearn.tree import DecisionTreeClassifier    #決策樹
from sklearn.ensemble import RandomForestClassifier#隨機森林
from sklearn.naive_bayes import GaussianNB         #天真貝氏
from sklearn.neighbors import KNeighborsClassifier #KNN
from sklearn import linear_model                   #邏輯回歸
from sklearn import svm                            #SVM

In [3]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score, accuracy_score

In [4]:
warnings.filterwarnings('ignore')

### 導入Bag of Words資料

In [5]:
BagOfWord = pd.read_csv(r"..\..\..\Feature_Array\PTT\Main_Features\Bag_of_Word.csv", header=0,index_col=None)

### 導入統計特徵資料(第一、第二、第三人稱代詞、留言長度)

In [6]:
Statistical_Features=pd.read_csv(r"..\..\..\Feature_Array\PTT\Extra_Features\Statistical_Features.csv", header=0, index_col=None)

### 導入情感特徵資料(正面情感字數、負面情感字數、情感分數、表情符號數、表情符號情感分數)

In [7]:
Sentiment=pd.read_csv(r"..\..\..\Feature_Array\PTT\Extra_Features\All_Sentiment_Sentiwordnet.csv", header=0, index_col=None)

### 導入卡方霸凌詞資料

In [8]:
BullyWord=pd.read_csv(r"..\..\..\Feature_Array\PTT\Extra_Features\Chisquare_Bully_data.csv", header=0, index_col=None)

#### X：Bag of Words、統計特徵、情感特徵、卡方霸凌詞

In [9]:
X =pd.concat([BagOfWord.iloc[:, :-1],Statistical_Features.iloc[:, 1:],Sentiment.iloc[:, 1:],BullyWord.iloc[:, 1:]], axis=1) # Features
Y = BagOfWord['answer'] # Target variable

### 宣告list 儲存評估指標結果

In [10]:
list_accuracy=[]
list_precision=[]
list_recall=[]
list_f1_Weight=[]
list_f1_Micro=[]
list_f1_Macro=[]
list_AUC=[]

### 建立機器學習函式

In [11]:
def ML(model,score):
    result = cross_val_score(model,X,Y,cv=KF,scoring=score)
    return round(result.mean(),4)

### 設定k折交叉驗證參數

In [12]:
KF = KFold(n_splits=10, shuffle=True) 

### 決策樹

In [13]:
DT = DecisionTreeClassifier()

In [None]:
list_accuracy.append(ML(DT,'accuracy'))

list_precision.append(ML(DT,'precision'))

list_recall.append(ML(DT,'recall'))

list_f1_Weight.append(ML(DT,'f1_weighted'))

list_f1_Micro.append(ML(DT,'f1_micro'))

list_f1_Macro.append(ML(DT,'f1_macro'))

list_AUC.append(ML(DT,'roc_auc'))

### 隨機森林

In [None]:
RF = RandomForestClassifier()

In [None]:
list_accuracy.append(ML(RF,'accuracy'))

list_precision.append(ML(RF,'precision'))

list_recall.append(ML(RF,'recall'))

list_f1_Weight.append(ML(RF,'f1_weighted'))

list_f1_Micro.append(ML(RF,'f1_micro'))

list_f1_Macro.append(ML(RF,'f1_macro'))

list_AUC.append(ML(RF,'roc_auc'))

### 天真貝氏

In [None]:
NB=GaussianNB()

In [None]:
list_accuracy.append(ML(NB,'accuracy'))

list_precision.append(ML(NB,'precision'))

list_recall.append(ML(NB,'recall'))

list_f1_Weight.append(ML(NB,'f1_weighted'))

list_f1_Micro.append(ML(NB,'f1_micro'))

list_f1_Macro.append(ML(NB,'f1_macro'))

list_AUC.append(ML(NB,'roc_auc'))

### 線性邏輯回歸

In [None]:
LR=linear_model.LogisticRegression()

In [None]:
list_accuracy.append(ML(LR,'accuracy'))

list_precision.append(ML(LR,'precision'))

list_recall.append(ML(LR,'recall'))

list_f1_Weight.append(ML(LR,'f1_weighted'))

list_f1_Micro.append(ML(LR,'f1_micro'))

list_f1_Macro.append(ML(LR,'f1_macro'))

list_AUC.append(ML(LR,'roc_auc'))

### KNN

In [None]:
KNN = KNeighborsClassifier()

In [None]:
list_accuracy.append(ML(KNN,'accuracy'))

list_precision.append(ML(KNN,'precision'))

list_recall.append(ML(KNN,'recall'))

list_f1_Weight.append(ML(KNN,'f1_weighted'))

list_f1_Micro.append(ML(KNN,'f1_micro'))

list_f1_Macro.append(ML(KNN,'f1_macro'))

list_AUC.append(ML(KNN,'roc_auc'))

### SVM

In [None]:
SVM=svm.SVC()

In [None]:
list_accuracy.append(ML(SVM,'accuracy'))

list_precision.append(ML(SVM,'precision'))

list_recall.append(ML(SVM,'recall'))

list_f1_Weight.append(ML(SVM,'f1_weighted'))

list_f1_Micro.append(ML(SVM,'f1_micro'))

list_f1_Macro.append(ML(SVM,'f1_macro'))

list_AUC.append(ML(SVM,'roc_auc'))

In [None]:
result_title=['Decision Tree','Random Forest','Gaussian Naive Bayes','Logistic regression','KNN','SVM']
result_dict={'Feature Crosse4_Chisquare_STWN':result_title,'Accuracy':list_accuracy,'Precision':list_precision,'Recall':list_recall,'F1_Weight':list_f1_Weight,'F1_Micro':list_f1_Micro,'F1_Macro':list_f1_Macro,'AUC':list_AUC}

In [None]:
df = pd.DataFrame(result_dict).T

In [None]:
df.to_csv (r"..\..\..\結果\PTT\特徵組合四\PTT_特徵組合四_卡方_Sentiwordnet.csv", index = True, header=False,encoding='utf_8_sig')