In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import seaborn as sns

In [2]:
data=pd.read_csv("emails.csv")

In [3]:
data.shape

(5728, 2)

In [6]:
data.columns

Index(['text', 'spam'], dtype='object')

### label
##### '1' indicates that the email is classified as spam.
##### '0' denotes that the email is legitimate (ham).
### text
##### This column contains the actual content of the email messages.

In [4]:
data.isnull().sum()

text    0
spam    0
dtype: int64

In [7]:
y=data["spam"]
y.head()

0    1
1    1
2    1
3    1
4    1
Name: spam, dtype: int64

In [8]:
x=data["text"]
x.head()

0    Subject: naturally irresistible your corporate...
1    Subject: the stock trading gunslinger  fanny i...
2    Subject: unbelievable new homes made easy  im ...
3    Subject: 4 color printing special  request add...
4    Subject: do not have money , get software cds ...
Name: text, dtype: object

In [9]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2, random_state=6
                                              )

In [10]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((4582,), (1146,), (4582,), (1146,))

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
x_train_bow = vectorizer.fit_transform(x_train)
x_test_bow = vectorizer.transform(x_test)

In [12]:
x_train_bow

<4582x33690 sparse matrix of type '<class 'numpy.float64'>'
	with 565198 stored elements in Compressed Sparse Row format>

In [13]:
from sklearn.svm import SVC

In [40]:
# Training the SVM model
model = SVC(C=0.1)
model.fit(x_train_bow, y_train)

In [41]:
y_pred=model.predict(x_test_bow)

count_ones = np.sum(y_pred == 0)
count_ones

1076

In [42]:
model.score(x_test_bow,y_test)

0.800174520069808

In [43]:
import pickle as pkl
filename="spam classification.pkl"
pkl.dump(model,open(filename,'wb'))

In [44]:
loaded_model=pkl.load(open(filename,'rb'))
loaded_model

In [45]:
import gradio as gd
data.columns

Index(['text', 'spam'], dtype='object')

In [46]:

def Grade(text):
    print(f"Original text: {text}")
    #input_data=pd.DataFrame([{'text': text}])
    input_data_vectorised=vectorizer.transform([text])
    print(f"Vectorized input shape: {input_data_vectorised.shape}")
    #input_data_vectorised
    prediction=model.predict(input_data_vectorised)
    print(f"Prediction: {prediction}")
    if prediction == 1:
        return "Spam"
    else:
        return "NOT spam"

In [47]:
app=gd.Interface(fn=Grade,
                inputs=[gd.Text(label="Enter text in the mail")],
                outputs=gd.Label(),
                title="Spam Detection")

In [48]:
app.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


