In [1]:
# Install Model maker
!pip install -q tflite-model-maker &> /dev/null

In [21]:
# Imports and check that we are using TF2.x
import numpy as np
import os

from tflite_model_maker import configs
from tflite_model_maker import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import text_classifier
from tflite_model_maker.text_classifier import DataLoader

import tensorflow as tf
assert tf.__version__.startswith('2')
tf.get_logger().setLevel('ERROR')

In [4]:
!wget https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/sms_spam.csv

--2023-04-10 07:17:10--  https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/sms_spam.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 500336 (489K) [text/plain]
Saving to: ‘sms_spam.csv’


2023-04-10 07:17:11 (12.8 MB/s) - ‘sms_spam.csv’ saved [500336/500336]



In [23]:
import pandas as pd

df = pd.read_csv("/content/sms_spam.csv")

In [24]:
df.head()

Unnamed: 0,type,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [6]:
df.describe()

Unnamed: 0,type,text
count,5574,5574
unique,2,5160
top,ham,"Sorry, I'll call later"
freq,4827,30


In [11]:
import re

df['text'] = df['text'].str.lower()
df['text'] = df['text'].apply(lambda x: "".join(re.sub(r'[^\w\s]','',x)))

In [12]:
df.head()

Unnamed: 0,type,text
0,ham,go until jurong point crazy available only in ...
1,ham,ok lar joking wif u oni
2,spam,free entry in 2 a wkly comp to win fa cup fina...
3,ham,u dun say so early hor u c already then say
4,ham,nah i dont think he goes to usf he lives aroun...


In [13]:
words = df['text'].str.split()

# concatenate all the lists of words into one list
all_words = [word for sublist in words for word in sublist]

# count the occurrences of each unique word using value_counts
word_counts = pd.Series(all_words).unique()

print(len(word_counts))

9585


In [14]:
df.to_csv("/content/pre_sms.csv")

In [15]:
# Use a model spec from model maker. Options are 'mobilebert_classifier', 'bert_classifier' and 'average_word_vec'
# The first 2 use the BERT model, which is accurate, but larger and slower to train
# Average Word Vec is kinda like transfer learning where there are pre-trained word weights
# and dictionaries
spec = model_spec.get('average_word_vec')
spec.num_words = 9585
spec.seq_len = 20
spec.wordvec_dim = 7

In [16]:
# Load the CSV using DataLoader.from_csv to make the training_data
data = DataLoader.from_csv(
      filename="/content/pre_sms.csv",
      text_column='text', 
      label_column='type', 
      model_spec=spec,
      delimiter=',',
      shuffle=True,
      is_training=True)

train_data, test_data = data.split(0.9)

In [17]:
# Build the model
model = text_classifier.create(train_data, model_spec=spec, epochs=20)

Epoch 2/2
Epoch 3/3
Epoch 4/4
Epoch 5/5
Epoch 6/6
Epoch 7/7
Epoch 8/8
Epoch 9/9
Epoch 10/10
Epoch 11/11
Epoch 12/12
Epoch 13/13
Epoch 14/14
Epoch 15/15
Epoch 16/16
Epoch 17/17
Epoch 18/18
Epoch 19/19
Epoch 20/20


In [25]:
loss, accuracy = model.evaluate(test_data)



In [19]:
model.export(export_dir='/content/spam')

In [20]:
accuracy = model.evaluate_tflite('spam/model.tflite', test_data)
print('TFLite model accuracy: ', accuracy)

TFLite model accuracy:  {'accuracy': 0.9874551971326165}


# Model Exporting

In [None]:
# This will export to SavedModel format with the model, vocabulary and labels. 
model.export(export_dir='/mm_spam/', export_format=[ExportFormat.LABEL, ExportFormat.VOCAB, ExportFormat.SAVED_MODEL])

In [None]:
# Rename the SavedModel subfolder to a version number
!mv /mm_spam/saved_model /mm_spam/123
!zip -r mm_spam.zip /mm_spam/ 

updating: mm_spam/ (stored 0%)
updating: mm_spam/labels.txt (stored 0%)
updating: mm_spam/123/ (stored 0%)
updating: mm_spam/123/assets/ (stored 0%)
updating: mm_spam/123/saved_model.pb (deflated 87%)
updating: mm_spam/123/variables/ (stored 0%)
updating: mm_spam/123/variables/variables.data-00000-of-00001 (deflated 26%)
updating: mm_spam/123/variables/variables.index (deflated 59%)
updating: mm_spam/123/keras_metadata.pb (deflated 86%)
updating: mm_spam/vocab.txt (deflated 49%)
  adding: mm_spam/123/saved_model/ (stored 0%)
  adding: mm_spam/123/saved_model/assets/ (stored 0%)
  adding: mm_spam/123/saved_model/saved_model.pb (deflated 87%)
  adding: mm_spam/123/saved_model/variables/ (stored 0%)
  adding: mm_spam/123/saved_model/variables/variables.data-00000-of-00001 (deflated 26%)
  adding: mm_spam/123/saved_model/variables/variables.index (deflated 59%)
  adding: mm_spam/123/saved_model/keras_metadata.pb (deflated 86%)
