In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
import logging

logging.basicConfig(level=logging.INFO, filename='03_Model-BoW-SVM.log', filemode='a', format='%(asctime)s %(levelname)s: %(message)s')

print("Program Start !!!")

# 讀取資料
data = pd.read_csv("01_DataPreFix.csv")
print("讀取資料 Finish !!!")

# 將 NaN 值替換為空字符串
data["ProcessedReview"].fillna("", inplace=True)

# 特徵提取
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data["ProcessedReview"])
y = data["IsInducing"]

print("特徵提取 Finish !!!")

# 切分資料集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("切分資料集 Finish !!!")

# 選擇模型
model = SVC()
print("選擇模型 Finish !!!")

# 訓練模型
model.fit(X_train, y_train)
print("訓練模型 Finish !!!")

# 預測
y_pred = model.predict(X_test)
print("預測 Finish !!!")

# 評估模型
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc_roc = roc_auc_score(y_test, y_pred)
avg_precision = average_precision_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)
print('AUC-ROC: ', auc_roc)
print('Avg Precision Score: ', avg_precision)

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
import logging

logging.basicConfig(level=logging.INFO, filename='03_Model-TF-IDF-SVM.log', filemode='a', format='%(asctime)s %(levelname)s: %(message)s')

print("Program Start !!!")

# 讀取資料
data = pd.read_csv("01_DataPreFix.csv")
print("讀取資料 Finish !!!")

# 將 NaN 值替換為空字符串
data["ProcessedReview"].fillna("", inplace=True)

# 特徵提取
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data["ProcessedReview"])
y = data["IsInducing"]

print("特徵提取 Finish !!!")

# 切分資料集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("切分資料集 Finish !!!")

# 選擇模型
model = SVC()
print("選擇模型 Finish !!!")

# 訓練模型
model.fit(X_train, y_train)
print("訓練模型 Finish !!!")

# 預測
y_pred = model.predict(X_test)
print("預測 Finish !!!")

# 評估模型
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc_roc = roc_auc_score(y_test, y_pred)
avg_precision = average_precision_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)
print('AUC-ROC: ', auc_roc)
print('Avg Precision Score: ', avg_precision)
