# Exercises

In [2]:
import gc
from pathlib import Path

import tensorflow as tf
from tensorflow import data, keras

ROOT_DIR = Path().absolute().parent
MLRUNS_DIR = ROOT_DIR.parents[1] / "mlruns"
DATA_DIR = ROOT_DIR / "dataset"
PROTO_DIR = ROOT_DIR / "protobufs"
TFR_DIR = DATA_DIR / "tfrecords"
IMDB_DIR = DATA_DIR / "large_movie_review"

if not TFR_DIR.is_dir():
    TFR_DIR.mkdir(parents=True)
if not PROTO_DIR.is_dir():
    PROTO_DIR.mkdir(parents=True)

print(f"{MLRUNS_DIR}\n{DATA_DIR}")

/home/twogoodap/Coding_Playground/Machine_Learning/Hands_on_Machine_Learning/handson-ml/mlruns
/home/twogoodap/Coding_Playground/Machine_Learning/Hands_on_Machine_Learning/handson-ml/handson_ml/chapter_13/dataset


In [3]:
import mlflow

mlflow.set_tracking_uri(f"sqlite:///{MLRUNS_DIR}/mlflow.db")
mlflow.set_experiment("tf_data_api")

<Experiment: artifact_location='/home/twogoodap/Coding_Playground/Machine_Learning/Hands_on_Machine_Learning/handson-ml/handson_ml/chapter_13/mlruns/2', creation_time=1699089661167, experiment_id='2', last_update_time=1699089661167, lifecycle_stage='active', name='tf_data_api', tags={}>

## 9

```proto
syntax = "proto3";

message BytesList { repeated bytes value = 1; }
message FloatList { repeated float value = 1 [packed = true]; }
message Int64List { repeated int64 value = 1 [packed = true]; }
message Feature {
    oneof kind {
        BytesList bytes_list = 1;
        FloatList float_list = 2;
        Int64List int64_list = 3;
    }
};
message Features { map<string, Feature> feature = 1; };
message Example { Features features = 1; };
```

In [314]:
from contextlib import ExitStack

import numpy as np
from tensorflow.train import BytesList, Example, Feature, Features, Int64List


def fashion_mnist_to_tfrecord(
    dataset: data.Dataset,
    name: str,
    record_dir: Path = TFR_DIR / "fashion_mnist",
    n_shards: int = 10,
    seed: int = 42,
) -> list[str]:
    def __fashion_mnist_example(image: np.ndarray | tf.Tensor, label: str) -> Example:
        return Example(
            features=Features(
                feature={
                    "image": Feature(
                        bytes_list=BytesList(
                            value=[tf.io.serialize_tensor(image).numpy()]
                        )
                    ),
                    "label": Feature(int64_list=Int64List(value=[int(label)])),
                }
            )
        )

    (record_dir / name).mkdir(parents=True, exist_ok=True)
    paths = [
        str(record_dir / name / f"{shard:03}.tfrecord") for shard in range(n_shards)
    ]

    with ExitStack() as stack:
        writers = [stack.enter_context(tf.io.TFRecordWriter(path)) for path in paths]

        for i, (img, lbl) in dataset.enumerate():
            writers[i % n_shards].write(
                __fashion_mnist_example(img, lbl).SerializeToString()
            )

    return paths

In [315]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

BUFFER_SIZE = 10_000
SEED = 42

images, targets = (
    (fmnist := fetch_openml(name="Fashion-MNIST", as_frame=False, parser="auto")).data,
    fmnist.target,
)

X_train_full, X_test, y_train_full, y_test = train_test_split(
    images.reshape(-1, 28, 28).astype("uint8"),
    targets,
    test_size=0.2,
    random_state=SEED,
    stratify=targets,
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, random_state=SEED, stratify=y_train_full
)
del X_train_full, y_train_full

train_set, val_set, test_set = (
    data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(
        buffer_size=BUFFER_SIZE
    ),
    data.Dataset.from_tensor_slices((X_val, y_val)),
    data.Dataset.from_tensor_slices((X_test, y_test)),
)

del X_train, y_train, X_val, y_val, X_test, y_test
gc.collect()

96135

In [316]:
train_paths = fashion_mnist_to_tfrecord(train_set, name="train")
val_paths = fashion_mnist_to_tfrecord(val_set, name="validation")
test_paths = fashion_mnist_to_tfrecord(test_set, name="test")

del train_set, val_set, test_set
gc.collect()

2024-02-17 17:23:15.705397: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization
2024-02-17 17:23:25.743780: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization
2024-02-17 17:23:28.697177: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization


4133

In [317]:
from glob import glob

from tensorflow.io import FixedLenFeature
from tensorflow.keras import layers

BUFFER_SIZE = 10_000
N_THREADS = data.AUTOTUNE


def create_tfrecord_dataset(
    record_paths: list[str],
    batch_size: int = 128,
    n_threads: int | None = N_THREADS,
    cache: bool = False,
    shuffle_buf_size: int | None = None,
    seed: int = 42,
) -> data.TFRecordDataset:
    def __parse_fashion_mnist_tfrecord(record: bytes) -> tuple[tf.Tensor, tf.Tensor]:
        parsed = tf.io.parse_example(
            record,
            features={
                "image": FixedLenFeature(shape=(), dtype=tf.string, default_value=""),
                "label": FixedLenFeature(shape=(), dtype=tf.int64),
            },
        )

        return (
            tf.ensure_shape(
                tf.io.parse_tensor(parsed["image"], out_type=tf.uint8), shape=(28, 28)
            ),
            tf.cast(parsed["label"], dtype=tf.uint8),
        )

    dataset = data.TFRecordDataset(record_paths, num_parallel_reads=n_threads).map(
        __parse_fashion_mnist_tfrecord, num_parallel_calls=n_threads
    )
    if cache:
        dataset = dataset.cache()
    if shuffle_buf_size is not None:
        dataset = dataset.shuffle(
            buffer_size=shuffle_buf_size, seed=SEED, reshuffle_each_iteration=True
        )

    return dataset.batch(
        batch_size, drop_remainder=True, num_parallel_calls=n_threads
    ).prefetch(n_threads)


train_set = create_tfrecord_dataset(
    glob(str(TFR_DIR / "fashion_mnist" / "train" / "*.tfrecord")),
    shuffle_buf_size=BUFFER_SIZE,
)

(norm := layers.Normalization(input_shape=train_set.element_spec[0].shape[1:])).adapt(
    train_set.map(lambda X, y: X, num_parallel_calls=N_THREADS)
)

train_set = train_set.map(lambda X, y: (norm(X), y), num_parallel_calls=N_THREADS)

val_set = create_tfrecord_dataset(
    glob(str(TFR_DIR / "fashion_mnist" / "validation" / "*.tfrecord")),
    cache=True,
).map(lambda X, y: (norm(X), y), num_parallel_calls=N_THREADS)

test_set = create_tfrecord_dataset(
    glob(str(TFR_DIR / "fashion_mnist" / "test" / "*.tfrecord")),
    cache=True,
)

2024-02-17 17:23:32.581657: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 5261512097476009598


In [318]:
for trs in train_set.take(1):
    print("Train:")
    print(trs)

for vs in val_set.take(1):
    print("\nVal:")
    print(vs)

for tes in test_set.take(1):
    print("\nTest:")
    print(tes)

Train:
(<tf.Tensor: shape=(128, 28, 28), dtype=float32, numpy=
array([[[-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        ...,
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ]],

       [[-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
        [-0.1670863 , -0.23698466, -0.2851459 , ..., -0.46923783,
         -0.3833782 , -0.2408609 ],
    

2024-02-17 17:23:32.782728: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2024-02-17 17:23:32.799287: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [319]:
model = keras.Sequential(
    [
        layers.Flatten(input_shape=train_set.element_spec[0].shape[1:]),
        layers.Dense(100, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(50, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(50, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(10, activation="softmax"),
    ]
)
model.compile(
    optimizer="nadam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
    jit_compile=True,
)
model.summary()

Model: "sequential_86"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_13 (Flatten)        (None, 784)               0         
                                                                 
 dense_98 (Dense)            (None, 100)               78500     
                                                                 
 batch_normalization_59 (Ba  (None, 100)               400       
 tchNormalization)                                               
                                                                 
 dense_99 (Dense)            (None, 50)                5050      
                                                                 
 batch_normalization_60 (Ba  (None, 50)                200       
 tchNormalization)                                               
                                                                 
 dense_100 (Dense)           (None, 50)              

In [320]:
from tensorflow.keras.callbacks import EarlyStopping

mlflow.tensorflow.autolog()

history = model.fit(
    train_set,
    epochs=1000,
    validation_data=val_set,
    callbacks=[EarlyStopping(patience=20, restore_best_weights=True)],
)

2024/02/17 17:23:32 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'b6a8ddb40ab24d31a1d6cf80cc25e6c9', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/1000




Epoch 2/1000
 42/328 [==>...........................] - ETA: 0s - loss: 0.3706 - accuracy: 0.8664 

2024-02-17 17:23:36.872517: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 11953096714435921924
2024-02-17 17:23:36.872606: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 37581899012378094


Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
INFO:tensorflow:Assets written to: /tmp/nix-shell.WhEkng/tmpmj6itgjw/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/nix-shell.WhEkng/tmpmj6itgjw/model/data/model/assets


In [321]:
final_model = keras.Sequential([norm, model])
final_model.compile(
    optimizer=model.optimizer, loss=model.loss, metrics=model.metrics[1:]
)

In [322]:
test_loss, test_accuracy = final_model.evaluate(test_set)



2024-02-17 17:24:05.280093: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 14677975496398457193
2024-02-17 17:24:05.280123: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 6394035354870006688


## 10

In [None]:
# import tarfile
# from io import BytesIO

# import requests

# if not IMDB_DIR.is_dir():
#     IMDB_DIR.mkdir(parents=True, exist_ok=True)

# with BytesIO(
#     initial_bytes=requests.get(
#         "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
#         allow_redirects=True,
#     ).content
# ) as archive:
#     tar = tarfile.open(fileobj=archive, mode="r:gz")
#     tar.extractall(IMDB_DIR)

### For Val & Test Sets

- 7,500 from both `pos` and `neg` for **Val**
- 5,000 from both `pos` and `neg` for **Test**

In [5]:
from glob import glob

import numpy as np
import tensorflow as tf
from tensorflow import data

SHUF_BUF_SIZE = 25_000
CYCLE_LENGTH = 1000
N_THREADS = 8
SEED = 42
VAL_SIZE = 7500

TRAIN_DIR = IMDB_DIR / "aclImdb" / "train"
TEST_DIR = IMDB_DIR / "aclImdb" / "test"


def create_imdb_dataset(
    pos_paths: list[str],
    neg_paths: list[str],
    cycle_len: int = CYCLE_LENGTH,
    n_threads: int = N_THREADS,
    shuf_buf_size: int | None = None,
    batch_size: int = 64,
) -> data.Dataset:
    dataset = (
        data.Dataset.list_files(pos_paths)
        .interleave(
            lambda filename: data.TextLineDataset(
                filename, num_parallel_reads=n_threads
            ).map(
                lambda line: (line, tf.constant(1, dtype=tf.uint8)),
                num_parallel_calls=n_threads,
            ),
            cycle_length=CYCLE_LENGTH,
            num_parallel_calls=n_threads,
        )
        .concatenate(
            data.Dataset.list_files(neg_paths).interleave(
                lambda filename: data.TextLineDataset(
                    filename, num_parallel_reads=n_threads
                ).map(
                    lambda line: (line, tf.constant(0, dtype=tf.uint8)),
                    num_parallel_calls=n_threads,
                ),
                cycle_length=cycle_len,
                num_parallel_calls=n_threads,
            )
        )
    )

    return (
        (
            dataset.shuffle(buffer_size=shuf_buf_size, reshuffle_each_iteration=True)
            if shuf_buf_size is not None
            else dataset
        )
        .batch(batch_size, num_parallel_calls=n_threads)
        .prefetch(data.AUTOTUNE)
    )


val_test_pos_paths, val_test_neg_paths = (
    np.fromiter(glob(str(TEST_DIR / "pos" / "*.txt")), dtype="object"),
    np.fromiter(glob(str(TEST_DIR / "neg" / "*.txt")), dtype="object"),
)

shuf_idx = np.random.default_rng().permutation(len(val_test_pos_paths))
val_pos_paths, val_neg_paths = (
    val_test_pos_paths[shuf_idx[:VAL_SIZE]],
    val_test_neg_paths[shuf_idx[:VAL_SIZE]],
)
test_pos_paths, test_neg_paths = (
    val_test_pos_paths[shuf_idx[VAL_SIZE:]],
    val_test_neg_paths[shuf_idx[VAL_SIZE:]],
)

del val_test_pos_paths, val_test_neg_paths

train_set = create_imdb_dataset(
    pos_paths=glob(str(TRAIN_DIR / "pos" / "*.txt")),
    neg_paths=glob(str(TRAIN_DIR / "neg" / "*.txt")),
    shuf_buf_size=SHUF_BUF_SIZE,
)
val_set = create_imdb_dataset(pos_paths=val_pos_paths, neg_paths=val_neg_paths)
test_set = create_imdb_dataset(pos_paths=test_pos_paths, neg_paths=test_neg_paths)

del val_pos_paths, val_neg_paths, test_pos_paths, test_neg_paths
gc.collect()

2278

In [6]:
from tensorflow.keras import layers

text_vec = layers.TextVectorization()
text_vec.adapt(train_set.take(5000).map(lambda X, y: X, num_parallel_calls=N_THREADS))

train_set = train_set.map(lambda X, y: (text_vec(X), y), num_parallel_calls=N_THREADS)
val_set = val_set.map(lambda X, y: (text_vec(X), y), num_parallel_calls=N_THREADS)

2024-02-19 16:54:36.525954: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 16703509877230826048
2024-02-19 16:54:36.526086: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 7508873984737559684
2024-02-19 16:54:36.526142: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 15874770449440171858


In [7]:
DROPOUT_RATE = 0.5
EMBED_OUT = 50


def compute_sentence_embeddings(word_embeds: tf.Tensor, sum_axis: int = 1) -> tf.Tensor:
    n_words = tf.math.count_nonzero(
        tf.math.count_nonzero(word_embeds, axis=-1), axis=-1, keepdims=True
    )

    return tf.reduce_sum(word_embeds, axis=sum_axis) / tf.sqrt(
        tf.cast(n_words, dtype=word_embeds.dtype)
    )


sentiment_model = keras.models.Sequential(
    [
        layers.Embedding(input_dim=text_vec.vocabulary_size(), output_dim=EMBED_OUT),
        layers.Lambda(compute_sentence_embeddings),
        layers.Dense(100, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(rate=DROPOUT_RATE),
        layers.Dense(50, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(rate=DROPOUT_RATE),
        layers.Dense(1, activation="sigmoid"),
    ]
)
sentiment_model.compile(
    optimizer="nadam",
    loss="binary_crossentropy",
    metrics=["accuracy", "Precision", "Recall"],
    jit_compile=False,
)

In [7]:
another_example = tf.constant(
    [
        [[1.0, 2.0, 3.0], [4.0, 5.0, 0.0], [0.0, 0.0, 0.0]],
        [[6.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
    ]
)

compute_sentence_embeddings(another_example)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[3.535534 , 4.9497476, 2.1213205],
       [6.       , 0.       , 0.       ]], dtype=float32)>

In [8]:
from tensorflow.keras.callbacks import EarlyStopping

mlflow.tensorflow.autolog()

history = sentiment_model.fit(
    train_set,
    epochs=20,
    validation_data=val_set,
    callbacks=EarlyStopping(patience=10, restore_best_weights=True),
)

2024/02/19 16:54:41 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '1b6c93b23e0d432880724c82215decbd', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/20


2024-02-19 16:54:46.432064: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-02-19 16:54:46.496735: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7ffd6dbfbfb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-02-19 16:54:46.496764: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Ti, Compute Capability 8.9
2024-02-19 16:54:46.502730: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-02-19 16:54:46.509799: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8800
2024-02-19 16:54:46.657751: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of th

    391/Unknown - 68s 159ms/step - loss: 0.5072 - accuracy: 0.7577 - precision: 0.7569 - recall: 0.7593

2024-02-19 16:55:49.560964: I tensorflow/core/framework/local_rendezvous.cc:409] Local rendezvous send item cancelled. Key hash: 15307283253652429124
2024-02-19 16:55:49.560991: I tensorflow/core/framework/local_rendezvous.cc:409] Local rendezvous send item cancelled. Key hash: 15384726380896506838
2024-02-19 16:55:49.561000: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 2394626338979982826
2024-02-19 16:55:49.561012: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 1692950827254816456




2024-02-19 16:55:52.138965: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 5456875936774510658
2024-02-19 16:55:52.139052: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 10505180421857578688


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20




In [9]:
final_sentiment_model = keras.models.Sequential(
    [layers.Input(shape=(), dtype=tf.string), text_vec, sentiment_model]
)
final_sentiment_model.compile(
    optimizer=sentiment_model.optimizer,
    loss=sentiment_model.loss,
    metrics=sentiment_model.metrics[1:],
)

In [10]:
test_metrics = final_sentiment_model.evaluate(test_set)



2024-02-19 17:00:08.804686: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 15665579406873082381
2024-02-19 17:00:08.804716: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 14268400143922188807
2024-02-19 17:00:08.804726: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 16955261489581513509
2024-02-19 17:00:08.804733: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 3626514809993161463
2024-02-19 17:00:08.804741: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous recv item cancelled. Key hash: 1298354914411440750


### Using TFDS

In [15]:
import tensorflow_datasets as tfds

train, val, test = tfds.load(
    name="imdb_reviews",
    split=["train", "test[:75%]", "test[75%:]"],
    as_supervised=True,
    shuffle_files=True,
)

In [20]:
for X, y in train.take(1):
    print(X, y)

print()

for X, y in val.take(1):
    print(X, y)

print()

for X, y in test.take(1):
    print(X, y)

tf.Tensor(b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.", shape=(), dtype=string) tf.Tensor(0, shape=(), dtype=int64)

tf.Tensor(b"There are films that make careers. For George Romero, it was NIGHT OF THE LIVING DEAD; for Kevin Smith, CLERKS; for Robert Rodriguez, EL MARIACHI. Add to that list Onur Tukel's absolutely amazing DING-A-

2024-02-19 17:09:39.783428: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2024-02-19 17:09:39.816266: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
