In [2]:
import os
import tensorflow as tf
import numpy as np
from keras.layers import TFSMLayer
from transformers import AutoTokenizer, TFAutoModel
from tensorflow.keras.models import load_model

In [3]:
tokenizer_path = r'C:\Users\tarun\Desktop\Youtube-Comments-Sentiments-Analysis\model\saved_tokenizer'
model_path = r'C:\Users\tarun\Desktop\Youtube-Comments-Sentiments-Analysis\model\transformer'

In [4]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model = tf.saved_model.load(model_path)

## Load dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset('dair-ai/emotion', 'split')
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True,max_length=55)
tokenized_dataset = dataset.map(tokenize, batched=True, batch_size=None)
tokenized_dataset.set_format('tf', columns=['input_ids', 'attention_mask', 'token_type_ids', 'label'])
def order(inp):
    data = list(inp.values())
    return {
        'input_ids': data[1],
        'attention_mask': data[3],
        'token_type_ids': data[2]
    }, data[0]
BATCH_SIZE=8
train_dataset = tf.data.Dataset.from_tensor_slices(tokenized_dataset['train'][:])
train_dataset = train_dataset.shuffle(1000).batch(BATCH_SIZE).map(order, num_parallel_calls=tf.data.AUTOTUNE)

validation_dataset = tf.data.Dataset.from_tensor_slices(tokenized_dataset['validation'][:])
validation_dataset = validation_dataset.batch(BATCH_SIZE).map(order, num_parallel_calls=tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices(tokenized_dataset['test'][:])
test_dataset = test_dataset.batch(BATCH_SIZE).map(order, num_parallel_calls=tf.data.AUTOTUNE)

# Convert the Model to TensorFlow Lite Format with the help of Float16 Quantization

In [19]:
converter = tf.lite.TFLiteConverter.from_saved_model(model_path)
converter.target_spec.supported_types = [tf.float16]  
tflite_model = converter.convert()

# Save the TFLite model
with open('model_float16.tflite', 'wb') as f:
    f.write(tflite_model)

## checking the accuracy of the tflite model

In [21]:
# Load TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path='model_float16.tflite')
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Function to get predictions from TFLite model for a single sample
def tflite_predict(interpreter, input_ids, attention_mask, token_type_ids):
    # Ensure inputs are reshaped to match expected input shapes (batch size 1)
    input_ids = np.expand_dims(input_ids, axis=0)           # Reshape to (1, 55)
    attention_mask = np.expand_dims(attention_mask, axis=0) # Reshape to (1, 55)
    token_type_ids = np.expand_dims(token_type_ids, axis=0) # Reshape to (1, 55)
    
    # Set the input tensors
    interpreter.set_tensor(input_details[1]['index'], input_ids)
    interpreter.set_tensor(input_details[0]['index'], attention_mask)
    interpreter.set_tensor(input_details[2]['index'], token_type_ids)
    
    # Run inference
    interpreter.invoke()
    
    # Get the output
    output = interpreter.get_tensor(output_details[0]['index'])
    return output

# Evaluate accuracy on the test set
correct_predictions = 0
total_samples = 0

for input_data, labels in test_dataset:
    # Extract individual components from input data
    input_ids = input_data['input_ids'].numpy()
    attention_mask = input_data['attention_mask'].numpy()
    token_type_ids = input_data['token_type_ids'].numpy()
    
    # Iterate over each sample in the batch
    for i in range(len(input_ids)):
        # Get predictions from TFLite model for each sample
        predictions = tflite_predict(
            interpreter, 
            input_ids[i], 
            attention_mask[i], 
            token_type_ids[i]
        )
        
        # Convert predictions to class labels (assuming a classification task)
        predicted_label = np.argmax(predictions, axis=1)[0]
        
        # Check if the prediction is correct
        correct_predictions += int(predicted_label == labels[i].numpy())
        total_samples += 1

accuracy = correct_predictions / total_samples
print(f"Accuracy of the TFLite model on test dataset: {accuracy:.4f}")

for input_data, labels in validation_dataset:
    # Extract individual components from input data
    input_ids = input_data['input_ids'].numpy()
    attention_mask = input_data['attention_mask'].numpy()
    token_type_ids = input_data['token_type_ids'].numpy()
    
    # Iterate over each sample in the batch
    for i in range(len(input_ids)):
        # Get predictions from TFLite model for each sample
        predictions = tflite_predict(
            interpreter, 
            input_ids[i], 
            attention_mask[i], 
            token_type_ids[i]
        )
        
        # Convert predictions to class labels (assuming a classification task)
        predicted_label = np.argmax(predictions, axis=1)[0]
        
        # Check if the prediction is correct
        correct_predictions += int(predicted_label == labels[i].numpy())
        total_samples += 1

accuracy = correct_predictions / total_samples
print(f"Accuracy of the TFLite model on validation dataset: {accuracy:.4f}")

Accuracy of the TFLite model on test dataset: 0.9110
Accuracy of the TFLite model on validation dataset: 0.9170
