In [159]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import gradio as gr

In [160]:
df = pd.read_csv('mail_data.csv')
df.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [161]:
df['Category'].value_counts()

Category
ham     4825
spam     747
Name: count, dtype: int64

* lable encoding

In [162]:
df.loc[df['Category']=='ham', 'Category',]=0
df.loc[df['Category']=='spam', 'Category',]=1

In [163]:
df.head()

Unnamed: 0,Category,Message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


* devide dataset

In [164]:
x= df['Message']
y = df['Category']

* split into training and testing

In [165]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=3)

In [166]:
print(x_train)

3075                  Don know. I did't msg him recently.
1787    Do you know why god created gap between your f...
1614                         Thnx dude. u guys out 2nite?
4304                                      Yup i'm free...
3266    44 7732584351, Do you want a New Nokia 3510i c...
                              ...                        
789     5 Free Top Polyphonic Tones call 087018728737,...
968     What do u want when i come back?.a beautiful n...
1667    Guess who spent all last night phasing in and ...
3321    Eh sorry leh... I din c ur msg. Not sad alread...
1688    Free Top ringtone -sub to weekly ringtone-get ...
Name: Message, Length: 4457, dtype: object


In [167]:
print(x_test)

2632    URGENT! Your mobile No 077xxx WON a £2,000 Bon...
454     Ok i will tell her to stay out. Yeah its been ...
983     Congrats! 2 mobile 3G Videophones R yours. cal...
1282        Am I the only one who doesn't stalk profiles?
4610                               Y de asking like this.
                              ...                        
4827                        Haha, just what I was thinkin
5291      Xy trying smth now. U eat already? We havent...
3325    I don wake since. I checked that stuff and saw...
3561    Lol I know! Hey someone did a great inpersonat...
1136                      K do I need a login or anything
Name: Message, Length: 1115, dtype: object


In [168]:
print(y_train)

3075    0
1787    0
1614    0
4304    0
3266    1
       ..
789     1
968     0
1667    0
3321    0
1688    1
Name: Category, Length: 4457, dtype: object


In [169]:
x_train.shape,x_test.shape,y_train.shape

((4457,), (1115,), (4457,))

* feature Extraction

In [170]:
feature_extraction = TfidfVectorizer(min_df=1,stop_words='english',lowercase=True)

In [171]:
x_train_feature=feature_extraction.fit_transform(x_train)
x_test_feature=feature_extraction.transform(x_test)

In [172]:
y_train = y_train.astype('int')
y_test = y_test.astype('int')

* model training

In [173]:
model = LogisticRegression()

In [174]:
model.fit(x_train_feature,y_train)

In [175]:
x_train_prediction = model.predict(x_train_feature)
train_accuracy = accuracy_score(x_train_prediction,y_train)
print(train_accuracy)

0.9670181736594121


In [176]:
x_test_prediction = model.predict(x_test_feature)
test_accuracy = accuracy_score(x_test_prediction,y_test)
print(test_accuracy)

0.9659192825112107


* model evaluation

In [177]:
input_data =["Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call The Mobile Update Co FREE on 08002986030"]
input_data=feature_extraction.transform(input_data)
prediction = model.predict(input_data)
print(prediction)

[1]


In [178]:
def predict_text(text):
    input_data = feature_extraction.transform([text])
    prediction = model.predict(input_data)
    return 'Normal' if prediction[0] == 0 else 'Spam!!'

iface = gr.Interface(
    fn=predict_text,
    inputs="text",
    outputs="text",
    title="Spam Detector",
    description="Enter Mail"
)

# Launch the Gradio app
iface.launch()

Running on local URL:  http://127.0.0.1:7881

To create a public link, set `share=True` in `launch()`.


