# Gradio End-to-end Machine Learning Tutorial

## 1. Build Machine Learning Model

### a. Exploratory Data Analysis

In [1]:
# Load the data
import pandas as pd

spam_data = pd.read_csv("spam.csv", encoding="latin-1")
spam_data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [3]:
spam_data = spam_data[['v1', 'v2']]
spam_data

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will Ì_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [5]:
# Change columns name and change target values

spam_data.rename(columns = {'v1': 'target', 
                            'v2': 'text'}, 
                inplace = True)

spam_data.head()

Unnamed: 0,target,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [7]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder() 
spam_data['target'] = label_encoder.fit_transform(spam_data['target'])

### b. Data Preprocessing

In [10]:
# Split data into training and testing sets 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.linear_model import LogisticRegression
X_train, X_test, Y_train, Y_test = train_test_split(spam_data.text, spam_data.target, random_state=0)

In [12]:
# Initialize CountVectorizer and transform data
vectorizer = CountVectorizer(ngram_range=(1, 2)).fit(X_train)
X_train_vectorized = vectorizer.transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [14]:
spam_data.head()

Unnamed: 0,target,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [16]:
print(spam_data.iloc[0]['text'])

Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...


### c. Data Build Machine Learning Model

In [19]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#### c.1. Logic Regression Model

In [22]:
# 1. Train the model
lr_model = LogisticRegression()

lr_model.fit(X_train_vectorized, Y_train) 
# 2. Make prediction 
y_lr_model_predict = lr_model.predict(X_test_vectorized)

In [26]:
# 3. Evaluate the model
print('Logistic Regression:')
print('Accuracy:', accuracy_score(Y_test, y_lr_model_predict)) 
print('Precision:', precision_score(Y_test, y_lr_model_predict)) 
print('Recall:', recall_score(Y_test, y_lr_model_predict)) 
print('F1 score:', f1_score(Y_test, y_lr_model_predict))

Logistic Regression:
Accuracy: 0.9791816223977028
Precision: 0.9941176470588236
Recall: 0.8578680203045685
F1 score: 0.9209809264305178


#### c.2. Linear SVM Model

In [28]:
svm_model = LinearSVC() 
svm_model.fit(X_train_vectorized, Y_train) 
y_svm_model_predict = svm_model.predict(X_test_vectorized)



In [30]:
# Evaluate the model
print('\nSVM:') 
print('Accuracy:', accuracy_score(Y_test, y_svm_model_predict)) 
print('Precision:', precision_score(Y_test, y_svm_model_predict)) 
print('Recall:', recall_score(Y_test, y_svm_model_predict)) 
print('F1 score:', f1_score(Y_test, y_svm_model_predict))


SVM:
Accuracy: 0.9806173725771715
Precision: 1.0
Recall: 0.8629441624365483
F1 score: 0.9264305177111717


#### c.3. Conclusion.   
The Linear SVM model has the best performance compared to Logistic Regression model

### c. Use model on real-life data

In [32]:
# 1. Serialize the best model
import joblib

In [34]:
import pickle
import os

model_file_name = "svm_best_model.pkl"
model_folder = "./models/"

# Create the directory if it doesn't exist
os.makedirs(model_folder, exist_ok=True)

# Save the model using pickle
with open(os.path.join(model_folder, model_file_name), 'wb') as file:
    pickle.dump(svm_model, file)


In [38]:
# 2. Load the model
# Load the model using pickle
with open(os.path.join(model_folder, model_file_name), 'rb') as file:
    loaded_svm_model = pickle.load(file)


In [42]:
vectorizer_file_name = "count_vectorizer.pkl"

with open(os.path.join(model_folder, vectorizer_file_name), 'wb') as file:
    pickle.dump(vectorizer, file)


In [44]:
# Load the vectorizer
with open(os.path.join(model_folder, vectorizer_file_name), 'rb') as file:
    cv = pickle.load(file)


In [46]:
def make_prediction(input_text):
    # Transform the input text
    preprocessed_text = cv.transform([input_text])

    # Make prediction using the loaded SVM model
    prediction = loaded_svm_model.predict(preprocessed_text)

    info = '' 
    if prediction[0] == 0:
        info = "Ham ✅"
    else:
        info = "Spam 🚨"
    
    final_info = "This message is a : {}".format(info)
    
    return final_info

# Example text for prediction
text_example = "Thank you for your message. You have won $1000000000 send your bank information asap!!!"
print(make_prediction(text_example))


This message is a : Spam 🚨


## 2. Build Gradio App

In [None]:
#!pip3 install gradio

In [48]:
import gradio as gr

In [50]:
headline = "Spam Detector App"

iface = gr.Interface(fn=make_prediction, inputs="text", outputs="text", title = headline)
iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://48cf93d02d6f5f4452.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [52]:
X_test

4456    Aight should I just plan to come up later toni...
690                                    Was the farm open?
944     I sent my scores to sophas and i had to do sec...
3768    Was gr8 to see that message. So when r u leavi...
1189    In that case I guess I'll see you at campus lodge
                              ...                        
1889                     I gotta collect da car at 6 lei.
2250    I am getting threats from your sales executive...
2915      What time should I tell my friend to be around?
1282    Ever green quote ever told by Jerry in cartoon...
1206    As a SIM subscriber, you are selected to recei...
Name: text, Length: 1393, dtype: object