In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
import tensorflow as tf
from sklearn.metrics import confusion_matrix, roc_curve
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import (Dense,Flatten,SimpleRNN,InputLayer,Bidirectional,GRU,LSTM,BatchNormalization,Dropout,Input,GlobalMaxPooling1D,Embedding,TextVectorization,LayerNormalization,MultiHeadAttention)
from tensorflow.keras.losses import BinaryCrossentropy,CategoricalCrossentropy, SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy,TopKCategoricalAccuracy, CategoricalAccuracy, SparseCategoricalAccuracy
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
import re
import string
from transformers import (BertTokenizerFast,TFBertTokenizer,BertTokenizer,RobertaTokenizerFast,
                          DataCollatorWithPadding,TFRobertaForSequenceClassification,TFBertForSequenceClassification,
                          TFBertModel,create_optimizer)
from keras.models import Model
from keras import Input 

In [None]:
train_data = pd.read_csv('train_data.csv')

In [None]:
train_data.head()

In [None]:
train_data.shape

In [None]:
train_data.info()

In [None]:
# taking care of the 'vote' column 
train_data['vote'] = train_data['vote'].str.replace(',', '')
train_data['vote'] = pd.to_numeric(train_data['vote'], errors='coerce')
median_value = train_data['vote'].median()
train_data['vote'].fillna(median_value, inplace=True)

In [None]:
train_data['vote'].unique()

In [None]:
# due to few amount of values in column 'style'
train_data = train_data.drop('style', axis=1)

In [None]:
# to handle the nan values in column 'summary'
train_data = train_data.dropna()

In [None]:
train_data.info()

In [None]:
train_data.describe()

## First Analysis Task
* Plotting the distribution of overall ratings
* Checking if the dataset is balanced or not

In [None]:
plt.figure(figsize=(8, 6))
train_data['overall'].value_counts().sort_index().plot(kind='bar')
plt.title('Distribution of Overall Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.show()

Due to the large number of samples in the class of five stars against other classes, we can conclude that **the dataset is not balanced**. 

Taking care of this, we are going to balance it by **over sampling**!

decreasing the size of the majority classes.

In [None]:
# Define the features and target variable
x = train_data.drop('overall', axis=1)
y = train_data['overall']

In [None]:
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(x, y)

train_data_resampled = pd.concat([X_resampled, y_resampled], axis=1)

In [None]:
plt.figure(figsize=(8, 6))
train_data_resampled['overall'].value_counts().sort_index().plot(kind='bar')
plt.title('Distribution of Overall Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.show()

In [None]:
train_data_resampled.shape

## Second Analysis Task


In [None]:
# working on a small part of the dataset due to less computation cost
checking_df = train_data_resampled.head(20000)

# download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# preprocess text
def preprocess_text(text):
    text = text.lower()  # to lowercase
    text = ''.join([c for c in text if c.isalpha() or c.isspace()])  # remove nonalphabetic characters
    text = ' '.join([word for word in text.split() if word not in stop_words])  # remove stop words
    return text

# apply preprocessing to reviewText
checking_df['processed_reviewText'] = checking_df['reviewText'].apply(preprocess_text)

# categorize reviews
checking_df['sentiment'] = checking_df['overall'].apply(lambda x: 'positive' if x in [4, 5] else ('neutral' if x == 3 else 'negative'))

# generating word cloud
def generate_wordcloud(text, title):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.title(title)
    plt.axis('off')
    plt.show()

# word cloud for each sentiment group
positive_text = ' '.join(checking_df[checking_df['sentiment'] == 'positive']['processed_reviewText'])
neutral_text = ' '.join(checking_df[checking_df['sentiment'] == 'neutral']['processed_reviewText'])
negative_text = ' '.join(checking_df[checking_df['sentiment'] == 'negative']['processed_reviewText'])

generate_wordcloud(positive_text, 'Positive Reviews Word Cloud')
generate_wordcloud(neutral_text, 'Neutral Reviews Word Cloud')
generate_wordcloud(negative_text, 'Negative Reviews Word Cloud')

# analyze common words
positive_words = Counter(positive_text.split())
negative_words = Counter(negative_text.split())

common_words = set(positive_words.keys()).intersection(set(negative_words.keys()))
print(f"Common words between positive and negative reviews: {common_words}")

**Interpretation of common words**:

There might simply be common words both in negative and positive reviews, because there are many words we use that do not carry any specific sentiment by thhemselves, for instance 'chair'. Whoever has used 'chair' in their sentence could have been happy using it or not!

## Third Analysis Task

In [None]:
reviewer_votes = checking_df.groupby(['reviewerID', 'reviewerName'])['vote'].sum().reset_index()

top_reviewers = reviewer_votes.sort_values(by='vote', ascending=False).head(10)

# top 10 reviewers
print("Top 10 Reviewers with Most Useful Comments:")
print(top_reviewers[['reviewerName', 'vote']])

# results in another style
for idx, row in top_reviewers.iterrows():
    print(f"{row['reviewerName']}: {row['vote']} votes")

## Fourth Analysis Task

In [None]:
# length of each review
checking_df['review_length'] = checking_df['reviewText'].apply(len)

# histogram of the original review lengths
plt.figure(figsize=(12, 6))
plt.hist(checking_df['review_length'], bins=50, color='blue', edgecolor='black')
plt.title('Histogram of Review Lengths (Original)')
plt.xlabel('Review Length (number of characters)')
plt.ylabel('Frequency')
plt.show()

# filtering by considering reviews with length less than the 95th percentile
threshold = checking_df['review_length'].quantile(0.95)
filtered_df = checking_df[checking_df['review_length'] <= threshold]

# histogram of the filtered review lengths
plt.figure(figsize=(12, 6))
plt.hist(filtered_df['review_length'], bins=50, color='green', edgecolor='black')
plt.title('Histogram of Review Lengths (Filtered)')
plt.xlabel('Review Length (number of characters)')
plt.ylabel('Frequency')
plt.show()

# analysis of the number of characters
mean_length = checking_df['review_length'].mean()
max_length = checking_df['review_length'].max()
suggested_limit = int(threshold)  # Using the 95th percentile as the suggested limit
print(f"Mean review length: {mean_length}")
print(f"Max review length: {max_length}")
print(f"Suggested limit for the number of characters: {suggested_limit}")

* The histogram of the original review lengths shows a wide range of lengths, with some very long reviews.
* After filtering out outliers (above the 95th percentile), the histogram focuses on more typical review lengths.
* Also it is good for modeling to limit the number of characters as done here. 
* This helps in reducing the computational cost without losing significant information.

## Fifth Analysis Task 


In [None]:
product_details = pd.read_csv('title_brand.csv')
product_details.head()

In [None]:
five_star_reviews = checking_df[checking_df['overall'] == 5]

# five stars 
counts = five_star_reviews.groupby('asin').size().reset_index(name='five_star_count')

# get the top 10
top_products = counts.sort_values(by='five_star_count', ascending=False).drop_duplicates().head(10)

# merge the top products with product details
top_products_details = pd.merge(top_products, product_details, on='asin')
top_products_details = top_products_details[['brand', 'title', 'five_star_count']]
# dropping duplicates
top_products_details = top_products_details.drop_duplicates().reset_index().drop('index', axis=1)
# displaying the top ten
top_products_details

## Sixth Analysis Task

In [None]:
df = pd.merge(checking_df, product_details, on='asin')  

# count the number of comments for each brand  
brand_comments = df.groupby('brand').size().sort_values(ascending=False).head(10)  

# Calculating average score for each brand  
brand_avg_score = df.groupby('brand')['overall'].mean()  

# average scores for the top 10 brands  
top_brands_avg_score = brand_avg_score[brand_comments.index]  

# display  
result = pd.DataFrame({'Brand': top_brands_avg_score.index, 'Average Score': top_brands_avg_score.values})   
result = result.sort_values(by='Average Score', ascending=False).reset_index().drop('index', axis=1)
result

## Transformer Model

In [None]:
test_df = pd.read_csv('test_data.csv')

In [None]:
test_df.head()

In [None]:
test_df.shape

In [None]:
test_df.info()

In [None]:
# taking care of the 'vote' column 
median_value = test_df['vote'].median()
test_df['vote'].fillna(median_value, inplace=True)

In [None]:
# due to few amount of values in column 'style'
test_df = test_df.drop('style', axis=1)

In [None]:
# to handle the nan values in column 'summary'
test_df = test_df.dropna()

In [None]:
test_df.describe()

In [None]:
checking_test_df = test_df.head(10000)

In [None]:
checking_test_df['sentiment'] = checking_test_df['vote'].apply(lambda x: 'positive' if x in [4, 5] else ('neutral' if x == 3 else 'negative'))

In [None]:
checking_test_df = checking_test_df[['reviewText', 'sentiment']]

In [None]:
checking_test_df

In [None]:
checking_df.head()

In [None]:
checking_df2 = checking_df[['reviewText', 'sentiment']]

In [None]:
checking_df2

In [None]:
checking_train_df = checking_df2[:16000]
checking_val_df = checking_df2[16000:]

In [None]:
checking_train_df.shape, checking_val_df.shape

In [None]:
BATCH_SIZE=64

In [None]:
checking_train_df

In [None]:
# # encode & changing the datasets to tensorflow datas
  
# def prepare_datasets(train_df, val_df, test_df):  
#     def create_dataset(df):  
#         # Check if the DataFrame is empty  
#         if df.empty:  
#             raise ValueError("The provided DataFrame is empty.")  

#         # Check for NaN values in the 'sentiment' column  
#         if df['sentiment'].isnull().sum() > 0:  
#             print("Dropping NaN values from the sentiment column.")  
#             df = df.dropna(subset=['sentiment'])  
        
#         # Check if the DataFrame is empty after dropping NaNs  
#         if df.empty:  
#             raise ValueError("The DataFrame is empty after dropping NaN values.")  

#         # Encode the sentiment labels using .loc to avoid SettingWithCopyWarning  
#         label_encoder = LabelEncoder()  
#         df.loc[:, 'sentiment_encoded'] = label_encoder.fit_transform(df['sentiment'])  

#         # Create tensors  
#         text_tensor = tf.convert_to_tensor(df['reviewText'].values, dtype=tf.string)  
#         sentiment_tensor = tf.convert_to_tensor(df['sentiment_encoded'].values, dtype=tf.int64)  

#         # Create Dataset  
#         dataset = tf.data.Dataset.from_tensor_slices((text_tensor, sentiment_tensor))  

#         # Check the size of the dataset before shuffling  
#         if len(df) > 0:  
#             dataset = dataset.shuffle(buffer_size=len(df))  

#         return dataset, label_encoder  

#     # Prepare datasets for training, validation, and testing  
#     train_dataset, label_encoder = create_dataset(train_df)  
#     val_dataset, _ = create_dataset(val_df)  
#     test_dataset, _ = create_dataset(test_df)  

#     return train_dataset, val_dataset, test_dataset, label_encoder 

In [None]:
#train_df, val_df, test_df, label_encoder = prepare_datasets(checking_train_df, checking_val_df, checking_test_df)

In [None]:
#train_df

In [None]:
# for reviewText,sentiment in train_df.take(2):
#   print(reviewText)
#   print(sentiment)

In [None]:
# def standardization(input_data):
#     '''
#     Input: raw reviews
#     output: standardized reviews
#     '''
#     lowercase=tf.strings.lower(input_data)
#     no_tag=tf.strings.regex_replace(lowercase,"<[^>]+>","")
#     output=tf.strings.regex_replace(no_tag,"[%s]"%re.escape(string.punctuation),"")

#     return output

In [None]:
# VOCAB_SIZE=10000
# SEQUENCE_LENGTH=250
# EMBEDDING_DIM=300

In [None]:
# vectorize_layer=TextVectorization(
#     standardize=standardization,
#     max_tokens=VOCAB_SIZE,
#     output_mode='int',
#     output_sequence_length=SEQUENCE_LENGTH
# )

In [None]:
#type(train_df)

In [None]:
# training_data = train_df.map(lambda x, y: x)  
# if not training_data:  
#     print("Training data is empty, please check your input DataFrame.")  

# vectorize_layer.adapt(training_data)

In [None]:
#len(vectorize_layer.get_vocabulary())

In [None]:
# def vectorizer(review,label):
#     return vectorize_layer(review),label

In [None]:
# train_dataset=train_df.map(vectorizer)
# val_dataset=val_df.map(vectorizer)

In [None]:
# train_dataset=train_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)
# val_dataset=val_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

### Embeddings

In [None]:
# def positional_encoding(model_size, SEQUENCE_LENGTH):  
#     output = []  
#     for pos in range(SEQUENCE_LENGTH):  
#         PE = np.zeros((model_size,))  
#         for i in range(model_size):  
#             if i % 2 == 0:  
#                 PE[i] = np.sin(pos / (10000 ** (i / model_size)))  
#             else:  
#                 PE[i] = np.cos(pos / (10000 ** ((i - 1) / model_size)))  
#         output.append(PE)  
#     return np.array(output) 

In [None]:
# class Embeddings(Layer):
#   def __init__(self, sequence_length, vocab_size, embed_dim,):
#     super(Embeddings, self).__init__()
#     self.token_embeddings=Embedding(
#         input_dim=vocab_size, output_dim=embed_dim)
#     self.sequence_length = sequence_length
#     self.vocab_size = vocab_size
#     self.embed_dim = embed_dim

#   def call(self, inputs):
#     embedded_tokens = self.token_embeddings(inputs)
#     embedded_positions=positional_encoding(
#         self.embed_dim,self.sequence_length)
#     return embedded_tokens + embedded_positions
    
#   def compute_mask(self, inputs, mask=None):
#     return tf.math.not_equal(inputs, 0)
    
#   def get_config(self): 
#       config = super().get_config()
#       config.update({
#         "sequence_length": self.sequence_length,
#         "vocab_size": self.vocab_size,
#         "embed_dim": self.embed_dim,
#       })
#       return config

In [None]:
# import numpy as np

# test_input=tf.constant([[  2, 112,   10,   12,  5,   0,   0,   0,]])

# emb=Embeddings(8,20000,256)
# emb_out=emb(test_input)
# print(emb_out.shape)

### Encoder

In [None]:
# class TransformerEncoder(Layer):
#     def __init__(self, embed_dim, dense_dim, num_heads,):
#         super(TransformerEncoder, self).__init__()
#         self.embed_dim = embed_dim
#         self.dense_dim = dense_dim
#         self.num_heads = num_heads
#         self.attention = MultiHeadAttention(
#             num_heads=num_heads, key_dim=embed_dim,
#         )
#         self.dense_proj=tf.keras.Sequential(
#             [Dense(dense_dim, activation="relu"),Dense(embed_dim),]
#         )
#         self.layernorm_1 = LayerNormalization()
#         self.layernorm_2 = LayerNormalization()
#         self.supports_masking = True

#     def call(self, inputs, mask=None):
#       if mask is not None:
#         mask1 = mask[:, :, tf.newaxis]
#         mask2 = mask[:,tf.newaxis, :]
#         padding_mask = tf.cast(mask1&mask2, dtype="int32")

#       attention_output = self.attention(
#           query=inputs, key=inputs,value=inputs,attention_mask=padding_mask
#       )
      
#       proj_input = self.layernorm_1(inputs + attention_output)
#       proj_output = self.dense_proj(proj_input)
#       return self.layernorm_2(proj_input + proj_output)
      
#     def get_config(self): 
#       config = super().get_config()
#       config.update({
#         "embed_dim": self.embed_dim,
#         "num_heads": self.num_heads,
#         "dense_dim": self.dense_dim,
#       })
#       return config

In [None]:
# encoder_outputs = TransformerEncoder(256,2048,2)(emb_out)
# print(encoder_outputs.shape)

In [None]:
# class Embeddings2(Layer):  
#     def __init__(self, sequence_length, vocab_size, embed_dim):  
#         super(Embeddings, self).__init__()  # Ensure the class name matches  
#         self.token_embeddings = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)  
#         self.sequence_length = sequence_length  

#     def call(self, inputs, mask=None):  
#         embedded_tokens = self.token_embeddings(inputs)  
#         # Apply your positional encoding logic here as needed  
#         embedded_positions = self.positional_encoding(self.token_embeddings.output_dim, self.sequence_length)  
#         return embedded_tokens + embedded_positions  

#     def compute_mask(self, inputs, mask=None):  
#         # Create a mask based on the input values  
#         return tf.math.not_equal(inputs, 0)  

#     def positional_encoding(self, model_size, sequence_length):  
#         output = np.zeros((sequence_length, model_size))  
#         for pos in range(sequence_length):  
#             for i in range(model_size):  
#                 if i % 2 == 0:  
#                     output[pos, i] = np.sin(pos / (10000 ** (i / model_size)))  
#                 else:  
#                     output[pos, i] = np.cos(pos / (10000 ** ((i - 1) / model_size)))  
#         return output  

In [None]:
# class Embeddings3(Layer):
#     def __init__(self, sequence_length, vocab_size, embed_dim,):
#         super(Embeddings, self).__init__()
#         self.token_embeddings=Embedding(
#             input_dim=vocab_size, output_dim=embed_dim)
#         self.sequence_length = sequence_length
#         self.vocab_size = vocab_size
#         self.embed_dim = embed_dim

#     def call(self, inputs):
#         embedded_tokens = self.token_embeddings(inputs)
#         embedded_positions = positional_encoding(
#             self.embed_dim, self.sequence_length)
#         return embedded_tokens + embedded_positions

#     def compute_mask(self, inputs, mask=None):
#         # Use Lambda layer to wrap TensorFlow operation
#         return layers.Lambda(lambda x: tf.math.not_equal(x, 0))(inputs)

#     def get_config(self): 
#         config = super().get_config()
#         config.update({
#             "sequence_length": self.sequence_length,
#             "vocab_size": self.vocab_size,
#             "embed_dim": self.embed_dim,
#         })
#         return config

### Transformer Model

In [None]:
# EMBEDDING_DIM=128
# D_FF=1024
# NUM_HEADS=8
# NUM_LAYERS=1
# NUM_EPOCHS=20

In [None]:
# encoder_input=Input(shape=(None,), dtype="int64", name="input")
# x = Embeddings3(SEQUENCE_LENGTH,VOCAB_SIZE,EMBEDDING_DIM)(encoder_input)

# for _ in range(NUM_LAYERS):
#   x=TransformerEncoder(EMBEDDING_DIM,D_FF,NUM_HEADS)(x)

# x = Flatten()(x) 
# output=Dense(1, activation="sigmoid")(x)

# transformer = tf.keras.Model(
#     encoder_input, output, name="transformer"
# )
# transformer.summary()

### Training

In [None]:
# transformer.compile(loss=tf.keras.losses.BinaryCrossentropy(),
#               optimizer=tf.keras.optimizers.Adam(1e-4),
#               )

In [None]:
# history=transformer.fit(
#     train_dataset,
#     validation_data=val_dataset,
#     epochs=10)

In [None]:
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('model_loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'val'], loc='upper left')
# plt.show()

### Evaluation

In [None]:
# test_dataset=test_df.map(vectorizer)
# test_dataset=test_dataset.batch(BATCH_SIZE)
# transformer.evaluate(test_dataset)

### Testing

In [None]:
# test_data=tf.data.Dataset.from_tensor_slices([["this movie looks very interesting, i love the fact that the actors do a great job in showing how people lived in the 18th century, which wasn't very good at all. But atleast this movie recreates this scenes! "],
#                                               ["very good start, but movie started becoming uninteresting at some point though initially i thought it would have been much more fun. There was too much background noise, so in all i didn't like this movie "],])


In [None]:
# def vectorizer_test(review):
#     return vectorize_layer(review)
# test_dataset=test_data.map(vectorizer_test)

In [None]:
# transformer.predict(test_dataset)

## Bert & Roberta Model

In [None]:
BATCH_SIZE=8

### Bert

In [None]:
# constants
NUM_EPOCHS = 3
MODEL_ID_BERT = "bert-base-uncased"
MODEL_ID_ROBERTA = "roberta-base"

In [None]:
# load datasets
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')

In [None]:
# initialize tokenizer for BERT
tokenizer = BertTokenizerFast.from_pretrained(MODEL_ID_BERT)

In [None]:
# preprocess function
label_encoder = LabelEncoder()
def preprocess_function(examples):  
    examples.loc[:, 'sentiment_encoded'] = label_encoder.fit_transform(examples['sentiment']) 
    return tokenizer(examples["reviewText"], padding=True, truncation=True, max_length=128)

In [None]:
# tokenize datasets
train_data = train_data[['reviewText', 'sentiment_encoded']]
test_data = test_data[['reviewText', 'sentiment_encoded']]

train_data = train_data.rename(columns={"reviewText": "reviewText", "sentiment_encoded": "sentiment_encoded"})
test_data = test_data.rename(columns={"reviewText": "reviewText", "sentiment_encoded": "sentiment_encoded"})

In [None]:
# tokenize the datasets
train_data_tokenized = train_data.apply(preprocess_function, axis=1)
test_data_tokenized = test_data.apply(preprocess_function, axis=1)

In [None]:
# convert to TensorFlow datasets
tf_train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(input_ids=train_data_tokenized['input_ids'].tolist(),
         token_type_ids=train_data_tokenized['token_type_ids'].tolist(),
         attention_mask=train_data_tokenized['attention_mask'].tolist()),
    train_data['sentiment_encoded'].tolist()
)).batch(BATCH_SIZE).shuffle(buffer_size=100)

tf_val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(input_ids=test_data_tokenized['input_ids'].tolist(),
         token_type_ids=test_data_tokenized['token_type_ids'].tolist(),
         attention_mask=test_data_tokenized['attention_mask'].tolist()),
    test_data['sentiment_encoded'].tolist()
)).batch(BATCH_SIZE).shuffle(buffer_size=100)

In [None]:
# model definition
model = TFBertForSequenceClassification.from_pretrained(MODEL_ID_BERT, num_labels=2)
model.summary()

In [None]:
# compile model
optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=len(train_data) // BATCH_SIZE * NUM_EPOCHS)
model.compile(optimizer=optimizer, metrics=['accuracy'])

In [None]:
# train model
history = model.fit(tf_train_dataset, validation_data=tf_val_dataset, epochs=NUM_EPOCHS)

In [None]:
# plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# plot accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# testing with inputs
test_inputs = tokenizer([
    "This movie looks very interesting, I love the fact that the actors do a great job in showing how people lived in the 18th century, which wasn't very good at all. But at least this movie recreates these scenes!",
    "Very good start, but the movie started becoming uninteresting at some point though initially I thought it would have been much more fun. There was too much background noise, but later on towards the middle of the movie, my favorite character got in and he did a great job, so overall."
], padding=True, return_tensors="tf")

In [None]:
# get logits
logits = model(**test_inputs).logits
print(logits)