In [2]:
!pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/275.7 kB[0m [31m888.5 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/275.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283835 sha256=ed7ec17b2a2a4403268e1ef0f17e379db8ce055fec1c3d72065c77fef15a4699
  Stored in directory: /root/.cache/pip/wheels/fd/a2/af/9ac0a1a85a27f314a06b39e1f492bee1547d52549a4606ed89
Succes

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from lime.lime_text import LimeTextExplainer
from sklearn.pipeline import make_pipeline
import ipywidgets as widgets
from IPython.display import display, clear_output
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

df = pd.read_csv('/content/mail_data.csv')
df['Category'] = df['Category'].replace({'spam': 0, 'ham': 1})

X = df['Message']
Y = df['Category']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)

feature_extraction = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
model = SVC(probability=True)

parameters = {'svc__C': [0.1, 1, 10], 'svc__gamma': [1, 0.1, 0.01]}
pipe = make_pipeline(feature_extraction, model)
grid_search = GridSearchCV(pipe, param_grid=parameters, cv=5)


grid_search.fit(X_train, Y_train)


predictions = grid_search.predict(X_test)
print(classification_report(Y_test, predictions))


explainer = LimeTextExplainer(class_names=['Spam', 'Ham'])


text = widgets.Textarea(
    value='',
    placeholder='輸入郵件內容',
    description='郵件內容:',
    disabled=False,
    layout={'width': '400px', 'height': '100px'}
)
button = widgets.Button(description="檢測郵件")
clear_button = widgets.Button(description="清除")
output = widgets.Output()

display(text, button, clear_button, output)

def tag_parts_of_speech(email_content):
    tokens = word_tokenize(email_content)
    return pos_tag(tokens)

def on_button_clicked(b):
    with output:
        clear_output()
        email_content = text.value

        tagged = tag_parts_of_speech(email_content)
        print("郵件詞性標註:")
        print(tagged)

        print("\n郵件檢測結果：")
        prediction = grid_search.predict([email_content])
        if prediction[0] == 1:
            print("正常郵件")
        else:
            print("垃圾郵件")

        exp = explainer.explain_instance(email_content, grid_search.predict_proba, num_features=10)
        print("\n郵件特徵值:")
        for feature, importance in exp.as_list():
            print(f"{feature}: {importance:.4f}")


def on_clear_button_clicked(b):
    with output:
        clear_output()
    text.value = ''

button.on_click(on_button_clicked)
clear_button.on_click(on_clear_button_clicked)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


              precision    recall  f1-score   support

           0       0.99      0.88      0.94       155
           1       0.98      1.00      0.99       960

    accuracy                           0.98      1115
   macro avg       0.99      0.94      0.96      1115
weighted avg       0.98      0.98      0.98      1115



Textarea(value='', description='郵件內容:', layout=Layout(height='100px', width='400px'), placeholder='輸入郵件內容')

Button(description='檢測郵件', style=ButtonStyle())

Button(description='清除', style=ButtonStyle())

Output()