In [2]:
import numpy as np
import os

from tflite_model_maker import model_spec
from tflite_model_maker import text_classifier
from tflite_model_maker.config import ExportFormat
from tflite_model_maker.text_classifier import AverageWordVecSpec
from tflite_model_maker.text_classifier import DataLoader

from tflite_support.task import core
from tflite_support.task import processor
from tflite_support.task import text

import tensorflow as tf
assert tf.__version__.startswith('2')
tf.get_logger().setLevel('ERROR')

In [3]:
data_dir = tf.keras.utils.get_file(
      fname='SST-2.zip',
      origin='https://dl.fbaipublicfiles.com/glue/data/SST-2.zip',
      extract=True)
data_dir = os.path.join(os.path.dirname(data_dir), 'SST-2')

Downloading data from https://dl.fbaipublicfiles.com/glue/data/SST-2.zip


In [4]:
import pandas as pd

def replace_label(original_file, new_file):
  # Load the original file to pandas. We need to specify the separator as
  # '\t' as the training data is stored in TSV format
  df = pd.read_csv(original_file, sep='\t')

  # Define how we want to change the label name
  label_map = {0: 'negative', 1: 'positive'}

  # Excute the label change
  df.replace({'label': label_map}, inplace=True)

  # Write the updated dataset to a new file
  df.to_csv(new_file)

# Replace the label name for both the training and test dataset. Then write the
# updated CSV dataset to the current folder.
replace_label(os.path.join(os.path.join(data_dir, 'train.tsv')), 'train.csv')
replace_label(os.path.join(os.path.join(data_dir, 'dev.tsv')), 'dev.csv')


In [5]:
spec = model_spec.get('average_word_vec')

In [6]:
train_data = DataLoader.from_csv(
      filename='train.csv',
      text_column='sentence',
      label_column='label',
      model_spec=spec,
      is_training=True)
test_data = DataLoader.from_csv(
      filename='dev.csv',
      text_column='sentence',
      label_column='label',
      model_spec=spec,
      is_training=False)


2024-07-07 22:01:20.334969: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/unknown/Documents/00.Repositories/BudgetScript/.venv/lib/python3.9/site-packages/cv2/../../lib64:
2024-07-07 22:01:20.334999: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2024-07-07 22:01:20.335045: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (unknown): /proc/driver/nvidia/version does not exist
2024-07-07 22:01:20.335388: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler 

In [9]:
model = text_classifier.create(train_data, model_spec=spec, epochs=10)

Epoch 2/2
Epoch 3/3
Epoch 4/4
Epoch 5/5
Epoch 6/6
Epoch 7/7
Epoch 8/8
Epoch 9/9
Epoch 10/10


In [10]:
loss, acc = model.evaluate(test_data)



In [11]:
model.export(export_dir='average_word_vec')

2024-07-07 22:07:41.727717: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


Estimated count of arithmetic ops: 722  ops, equivalently 361  MACs


2024-07-07 22:07:42.394706: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-07-07 22:07:42.394896: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2024-07-07 22:07:42.397027: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1164] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.024ms.
  function_optimizer: function_optimizer did nothing. time = 0.004ms.

2024-07-07 22:07:42.435068: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2024-07-07 22:07:42.435118: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2024-07-07 22:07:42.455160: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:1963] Estimated count of arithmetic ops: 722  ops, equivalently 361  MACs



In [13]:
sentence_data = pd.read_csv('dev.csv', index_col=0)
sentence_data

Unnamed: 0,sentence,label
0,it 's a charming and often affecting journey .,positive
1,unflinchingly bleak and desperate,negative
2,allows us to hope that nolan is poised to emba...,positive
3,"the acting , costumes , music , cinematography...",positive
4,"it 's slow -- very , very slow .",negative
...,...,...
867,has all the depth of a wading pool .,negative
868,a movie with a real anarchic flair .,positive
869,a subject like this should inspire reaction in...,negative
870,... is an arthritic attempt at directing by ca...,negative


In [14]:
# Name of the TFLite text classification model.
_MODEL = './average_word_vec/model.tflite'
# Whether to run the model on EdgeTPU.
_ENABLE_EDGETPU = False
# Number of CPU threads to run the model.
_NUM_THREADS = 4


In [15]:
# Initialize the text classification model.
base_options = core.BaseOptions(file_name=_MODEL, use_coral=_ENABLE_EDGETPU, num_threads=_NUM_THREADS)
options = text.NLClassifierOptions(base_options)

# Create NLClassifier from options.
classifier = text.NLClassifier.create_from_options(options)


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [18]:
for idx in range(20):
  sentence = sentence_data['sentence'].iloc[idx]
  label = sentence_data['label'].iloc[idx]
  text_classification_result = classifier.classify(sentence)
  classification_list = text_classification_result.classifications[0].categories

  # Sort output by probability descending.
  predict_label = sorted(
      classification_list, key=lambda item: item.score, reverse=True)[0]
  print(sentence + label)
  print('predict_label: {}'.format(predict_label.category_name))

it 's a charming and often affecting journey . positive
predict_label: positive
unflinchingly bleak and desperate negative
predict_label: negative
allows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker . positive
predict_label: positive
the acting , costumes , music , cinematography and sound are all astounding given the production 's austere locales . positive
predict_label: positive
it 's slow -- very , very slow . negative
predict_label: negative
although laced with humor and a few fanciful touches , the film is a refreshingly serious look at young women . positive
predict_label: positive
a sometimes tedious film . negative
predict_label: negative
or doing last year 's taxes with your ex-wife . negative
predict_label: negative
you do n't have to know about music to appreciate the film 's easygoing blend of comedy and romance . positive
predict_label: positive
in exactly 89 minutes , most of which passed as slowly as if i 'd been sitti