In [20]:
import pandas as pd
import tensorflow_hub as hub
import tensorflow as tf
import tensorflow_text as tf_text
import numpy as np
import os
import tensorflow_data_validation as tfdv

In [4]:
model_dir = 'gs://metadata-bucket-sky/new_model/serving_test'
loaded_model = tf.keras.models.load_model(model_dir)


Two checkpoint references resolved to different objects (<tensorflow.python.keras.saving.saved_model.load.TensorFlowTransform>TransformFeaturesLayer object at 0x7f8d88d60450> and <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7f8d88d60910>).


In [237]:
test_data = pd.read_parquet('gs://metadata-bucket-sky/series_data.parquet')
test_data_movies = pd.read_parquet('gs://metadata-bucket-sky/movie_data.parquet')
label_vocab = pd.read_csv(os.path.join(model_dir, 'assets/tags'), header=None)

In [225]:
def display_random_prediction(test_data, model, label_vocab, idx=None):
    if idx is None:
        idx = np.random.choice(len(test_data))
        
    actual = test_data.iloc[idx]
    
    print('TITLE: ', actual['program_title'])
    print('SYNOPSIS: ', actual['program_longsynopsis'], '\n')
    
    labels = actual[3:][actual[3:] > 0]
    print('LABEL: ', labels.index[0], '\n')
    
    predicted = model.predict([actual['program_longsynopsis']])
    display(pd.Series(predicted[0], index=list(np.hstack(label_vocab.values))).sort_values(ascending=False)[:5])
    

In [246]:
display_random_prediction(test_data, loaded_model, label_vocab)

TITLE:  IOC Session
SYNOPSIS:  Signing of the candidate city contract and joint press conference of the IOC and the elected host city for the Olympic Winter Games 2026. Announcement show for the election of the candidate city for the Olympic Winter Games 2026. Includes press conferences and news announcements from the IOC. Candidate cities present the pros of hosting the 2026 Oympics. 

LABEL:  Sports 



Sports              0.956708
Olympics            0.925684
Sports non-event    0.916772
News                0.037099
Local               0.034975
dtype: float32

In [244]:
display_random_prediction(test_data_movies, loaded_model, label_vocab)

TITLE:  Mystery Road
SYNOPSIS:  An aboriginal detective returns to the Outback to investigate the murder of a teenage girl. 

LABEL:  Crime 



Mystery               0.363583
Thriller              0.352428
Drama                 0.294179
Action & Adventure    0.262419
Horror                0.192390
dtype: float32

---

# Alternative Test Data

In [229]:
alt_test_data = pd.read_parquet('gs://metadata-bucket-sky/test_data.parquet')

In [230]:
def display_random_prediction_alt(test_data, model, label_vocab, idx=None):
    if idx is None:
        idx = np.random.choice(len(test_data))
        
    actual = test_data.iloc[idx]
    
    print('SYNOPSIS: ', actual['synopsis'][:512], '\n')
    print('LABELS: ', list(actual['labels']))
    print('TAGS: ', list(actual['tags']), '\n')
    
    predicted = model.predict([actual['synopsis']])
    display(pd.Series(predicted[0], index=list(np.hstack(label_vocab.values))).sort_values(ascending=False)[:5])

In [235]:
display_random_prediction_alt(alt_test_data, loaded_model, label_vocab)

SYNOPSIS:  Frasier is devastated when he discovers that his learned mentor has been dating Carla. When a suspicious spot appears on Norm's chest X-ray, he decides to fulfill a dream of sailing around the world. The Cheers gang takes on a rival bar in a war of outrageous pranks; the gang becomes paranoid of all strangers entering the bar, including Boston Red Sox third baseman Wade Boggs. The Cheers gang takes on a rival bar in a war of outrageous pranks; the gang becomes paranoid of all strangers entering the bar, inc 

LABELS:  ['teens (ages 13-14)', 'Comedy', 'Drama', 'Holiday', 'Romance']
TAGS:  ['Bartender', 'Waitress', 'teens (ages 13-14)', 'Boston', 'Witty', 'Comedy', 'Amusing', 'Bar', 'Sitcom', 'Patron', '1990s', 'TV', '1980s', 'Drama', "Valentine's Day", 'Romantic comedy', 'Single life', 'Holiday', 'Romance', 'Halloween', 'Workplace romance', 'Opposites attract', '20th century'] 



Sitcom                    0.995261
Comedy                    0.985245
Teens                     0.584792
teens (ages 13-14)        0.269389
older teens (ages 15+)    0.164545
dtype: float32

---

# Below we can just type in some random text!

In [224]:
predicted = loaded_model.predict(['Improving education in California. Barry Munitz details the goals and results of Governor Gray Davis special legislative session on education. Improving public education in California.'])
display(pd.Series(predicted[0], index=list(np.hstack(label_vocab.values))).sort_values(ascending=False)[:5])

Local             0.928351
Public Affairs    0.802196
Educational       0.032287
News              0.029285
Special           0.028663
dtype: float32